remotion-claude-agent-demo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.md +160 -0
  2. package/apps/web/README.md +36 -0
  3. package/apps/web/env.example +20 -0
  4. package/apps/web/eslint.config.mjs +18 -0
  5. package/apps/web/next.config.ts +7 -0
  6. package/apps/web/package-lock.json +10348 -0
  7. package/apps/web/package.json +35 -0
  8. package/apps/web/postcss.config.mjs +7 -0
  9. package/apps/web/public/file.svg +1 -0
  10. package/apps/web/public/globe.svg +1 -0
  11. package/apps/web/public/next.svg +1 -0
  12. package/apps/web/public/vercel.svg +1 -0
  13. package/apps/web/public/window.svg +1 -0
  14. package/apps/web/src/app/.well-known/agent-card.json/route.ts +50 -0
  15. package/apps/web/src/app/background-tasks/[jobId]/cancel/route.ts +29 -0
  16. package/apps/web/src/app/events/stream/route.ts +58 -0
  17. package/apps/web/src/app/favicon.ico +0 -0
  18. package/apps/web/src/app/globals.css +174 -0
  19. package/apps/web/src/app/layout.tsx +34 -0
  20. package/apps/web/src/app/messages/answer/route.ts +57 -0
  21. package/apps/web/src/app/messages/stream/route.ts +381 -0
  22. package/apps/web/src/app/page.tsx +358 -0
  23. package/apps/web/src/app/tasks/[taskId]/cancel/route.ts +24 -0
  24. package/apps/web/src/app/tasks/[taskId]/route.ts +24 -0
  25. package/apps/web/src/app/tasks/route.ts +13 -0
  26. package/apps/web/src/components/chat/agent-blocks.tsx +111 -0
  27. package/apps/web/src/components/chat/ask-user-question-panel.tsx +172 -0
  28. package/apps/web/src/components/chat/session-sidebar.tsx +222 -0
  29. package/apps/web/src/components/chat/subagent-activity-sidebar.tsx +248 -0
  30. package/apps/web/src/components/chat/tool-blocks.tsx +550 -0
  31. package/apps/web/src/lib/a2a/activity-store.ts +150 -0
  32. package/apps/web/src/lib/a2a/client.ts +357 -0
  33. package/apps/web/src/lib/a2a/sse.ts +19 -0
  34. package/apps/web/src/lib/a2a/task-store.ts +111 -0
  35. package/apps/web/src/lib/a2a/types.ts +216 -0
  36. package/apps/web/src/lib/agent/answer-store.ts +109 -0
  37. package/apps/web/src/lib/agent/background-delivery.ts +343 -0
  38. package/apps/web/src/lib/agent/background-tool.ts +78 -0
  39. package/apps/web/src/lib/agent/background.ts +452 -0
  40. package/apps/web/src/lib/agent/chat.ts +543 -0
  41. package/apps/web/src/lib/agent/session-store.ts +26 -0
  42. package/apps/web/src/lib/chat/types.ts +44 -0
  43. package/apps/web/src/lib/env.ts +31 -0
  44. package/apps/web/src/lib/hooks/useA2AChat.ts +863 -0
  45. package/apps/web/src/lib/state/chat-atoms.ts +52 -0
  46. package/apps/web/src/lib/workspace.ts +9 -0
  47. package/apps/web/tsconfig.json +35 -0
  48. package/bin/remotion-agent.js +451 -0
  49. package/package.json +34 -0
  50. package/templates/.claude/CLAUDE.md +95 -0
  51. package/templates/.claude/README.md +129 -0
  52. package/templates/.claude/agents/composer-agent.md +188 -0
  53. package/templates/.claude/agents/crafter.md +181 -0
  54. package/templates/.claude/agents/creator.md +134 -0
  55. package/templates/.claude/agents/perceiver.md +92 -0
  56. package/templates/.claude/settings.json +36 -0
  57. package/templates/.claude/settings.local.json +39 -0
  58. package/templates/.claude/skills/agent-browser/SKILL.md +349 -0
  59. package/templates/.claude/skills/agent-browser/references/authentication.md +188 -0
  60. package/templates/.claude/skills/agent-browser/references/proxy-support.md +175 -0
  61. package/templates/.claude/skills/agent-browser/references/session-management.md +181 -0
  62. package/templates/.claude/skills/agent-browser/references/snapshot-refs.md +186 -0
  63. package/templates/.claude/skills/agent-browser/references/video-recording.md +162 -0
  64. package/templates/.claude/skills/agent-browser/templates/authenticated-session.sh +91 -0
  65. package/templates/.claude/skills/agent-browser/templates/capture-workflow.sh +68 -0
  66. package/templates/.claude/skills/agent-browser/templates/form-automation.sh +64 -0
  67. package/templates/.claude/skills/algorithmic-art/LICENSE.txt +202 -0
  68. package/templates/.claude/skills/algorithmic-art/SKILL.md +405 -0
  69. package/templates/.claude/skills/algorithmic-art/templates/generator_template.js +223 -0
  70. package/templates/.claude/skills/algorithmic-art/templates/viewer.html +599 -0
  71. package/templates/.claude/skills/asset-validator/SKILL.md +376 -0
  72. package/templates/.claude/skills/audio-video-sync/SKILL.md +219 -0
  73. package/templates/.claude/skills/bgm-manager/SKILL.md +334 -0
  74. package/templates/.claude/skills/remotion-best-practices/SKILL.md +45 -0
  75. package/templates/.claude/skills/remotion-best-practices/rules/3d.md +86 -0
  76. package/templates/.claude/skills/remotion-best-practices/rules/animations.md +29 -0
  77. package/templates/.claude/skills/remotion-best-practices/rules/assets/charts-bar-chart.tsx +173 -0
  78. package/templates/.claude/skills/remotion-best-practices/rules/assets/text-animations-typewriter.tsx +100 -0
  79. package/templates/.claude/skills/remotion-best-practices/rules/assets/text-animations-word-highlight.tsx +108 -0
  80. package/templates/.claude/skills/remotion-best-practices/rules/assets.md +78 -0
  81. package/templates/.claude/skills/remotion-best-practices/rules/audio.md +172 -0
  82. package/templates/.claude/skills/remotion-best-practices/rules/calculate-metadata.md +104 -0
  83. package/templates/.claude/skills/remotion-best-practices/rules/can-decode.md +75 -0
  84. package/templates/.claude/skills/remotion-best-practices/rules/charts.md +58 -0
  85. package/templates/.claude/skills/remotion-best-practices/rules/compositions.md +141 -0
  86. package/templates/.claude/skills/remotion-best-practices/rules/display-captions.md +126 -0
  87. package/templates/.claude/skills/remotion-best-practices/rules/extract-frames.md +229 -0
  88. package/templates/.claude/skills/remotion-best-practices/rules/fonts.md +152 -0
  89. package/templates/.claude/skills/remotion-best-practices/rules/get-audio-duration.md +58 -0
  90. package/templates/.claude/skills/remotion-best-practices/rules/get-video-dimensions.md +68 -0
  91. package/templates/.claude/skills/remotion-best-practices/rules/get-video-duration.md +58 -0
  92. package/templates/.claude/skills/remotion-best-practices/rules/gifs.md +138 -0
  93. package/templates/.claude/skills/remotion-best-practices/rules/images.md +130 -0
  94. package/templates/.claude/skills/remotion-best-practices/rules/import-srt-captions.md +67 -0
  95. package/templates/.claude/skills/remotion-best-practices/rules/lottie.md +68 -0
  96. package/templates/.claude/skills/remotion-best-practices/rules/maps.md +403 -0
  97. package/templates/.claude/skills/remotion-best-practices/rules/measuring-dom-nodes.md +35 -0
  98. package/templates/.claude/skills/remotion-best-practices/rules/measuring-text.md +143 -0
  99. package/templates/.claude/skills/remotion-best-practices/rules/parameters.md +98 -0
  100. package/templates/.claude/skills/remotion-best-practices/rules/sequencing.md +118 -0
  101. package/templates/.claude/skills/remotion-best-practices/rules/tailwind.md +11 -0
  102. package/templates/.claude/skills/remotion-best-practices/rules/text-animations.md +20 -0
  103. package/templates/.claude/skills/remotion-best-practices/rules/timing.md +179 -0
  104. package/templates/.claude/skills/remotion-best-practices/rules/transcribe-captions.md +19 -0
  105. package/templates/.claude/skills/remotion-best-practices/rules/transitions.md +122 -0
  106. package/templates/.claude/skills/remotion-best-practices/rules/trimming.md +53 -0
  107. package/templates/.claude/skills/remotion-best-practices/rules/videos.md +171 -0
  108. package/templates/.claude/skills/remotion-components/SKILL.md +453 -0
  109. package/templates/.claude/skills/render-config/SKILL.md +290 -0
  110. package/templates/.claude/skills/script-writer/SKILL.md +59 -0
  111. package/templates/.claude/skills/style-director/script-writer/SKILL.md +82 -0
  112. package/templates/.claude/skills/style-director/style-director/SKILL.md +287 -0
  113. package/templates/.claude/skills/style-director/style-director/references/audience-and-scenarios.md +43 -0
  114. package/templates/.claude/skills/style-director/style-director/references/interaction-innovation.md +26 -0
  115. package/templates/.claude/skills/style-director/style-director/references/motion-grammar.md +66 -0
  116. package/templates/.claude/skills/style-director/style-director/references/quality-checklist.md +29 -0
  117. package/templates/.claude/skills/style-director/style-director/references/scene-recipes.md +38 -0
  118. package/templates/.claude/skills/style-director/style-director/references/visual-style-system.md +148 -0
  119. package/templates/.claude/skills/subtitle-composer/SKILL.md +304 -0
  120. package/templates/.claude/skills/subtitle-processor/SKILL.md +308 -0
  121. package/templates/.claude/skills/timeline-generator/SKILL.md +253 -0
  122. package/templates/.claude/skills/video-preflight-check/SKILL.md +353 -0
  123. package/templates/.claude/skills/voice-synthesizer/SKILL.md +296 -0
  124. package/templates/.claude/skills/voice-synthesizer/scripts/synthesize_voice.py +315 -0
  125. package/templates/.claude/skills/voice-synthesizer/scripts/tts_cli.py +142 -0
  126. package/templates/.claude/skills/web-design-guidelines/SKILL.md +36 -0
  127. package/templates/.claude/skills/youtube-downloader/SKILL.md +99 -0
  128. package/templates/.claude/skills/youtube-downloader/scripts/download_video.py +145 -0
@@ -0,0 +1,92 @@
1
+ ---
2
+ name: perceiver
3
+ description: 浏览器感知与录屏代理。使用 agent-browser CLI 访问网页、分析内容、录制操作演示。
4
+ skills:
5
+ - agent-browser
6
+ ---
7
+
8
+ 你是感知分析代理。通过浏览器能力获取页面信息或录制操作演示,输出结构化的内容理解。
9
+
10
+ ## 核心能力
11
+
12
+ 1. **网页感知** - 访问 URL,分析页面结构、功能、关键元素
13
+ 2. **操作录制** - 录制浏览器操作演示,标记关键步骤
14
+ 3. **内容提取** - 提取文本、截图、交互元素信息
15
+
16
+ ## agent-browser 核心命令
17
+
18
+ ```bash
19
+ # 导航
20
+ agent-browser open <url> # 打开页面
21
+ agent-browser back / forward # 前进后退
22
+ agent-browser reload # 刷新
23
+
24
+ # 页面分析
25
+ agent-browser snapshot -i # 获取交互元素 (返回 @e1, @e2 等 ref)
26
+ agent-browser screenshot [path] # 截图
27
+ agent-browser get text @e1 # 获取元素文本
28
+ agent-browser get title # 获取页面标题
29
+ agent-browser get url # 获取当前 URL
30
+
31
+ # 交互操作
32
+ agent-browser click @e1 # 点击元素
33
+ agent-browser fill @e2 "text" # 填充输入框
34
+ agent-browser type @e2 "text" # 追加输入
35
+ agent-browser hover @e1 # 悬停
36
+ agent-browser scroll down 500 # 滚动
37
+
38
+ # 等待
39
+ agent-browser wait @e1 # 等待元素出现
40
+ agent-browser wait 2000 # 等待毫秒
41
+ agent-browser wait --load networkidle # 等待网络空闲
42
+
43
+ # 录制
44
+ agent-browser record start ./demo.webm # 开始录制
45
+ agent-browser record stop # 停止录制
46
+
47
+ # 关闭
48
+ agent-browser close # 关闭浏览器
49
+ ```
50
+
51
+ ## 执行步骤
52
+
53
+ 1. 确认输入类型(URL、录屏任务、页面状态)
54
+ 2. 使用 agent-browser CLI:
55
+ - `agent-browser open <url>` - 访问页面
56
+ - `agent-browser snapshot -i` - 获取页面结构与可交互元素
57
+ - `agent-browser screenshot` - 截取屏幕
58
+ - `agent-browser click/fill/type @ref` - 执行操作
59
+ 3. 提取关键内容:标题、功能点、操作流程、UI 元素
60
+ 4. 标记关键时刻与建议的视觉元素
61
+ 5. 输出 `content_understanding`
62
+
63
+ ## 输出格式
64
+
65
+ ```yaml
66
+ content_understanding:
67
+ type: "webpage" | "recording" | "analysis"
68
+ summary: string
69
+
70
+ key_steps:
71
+ - step: string
72
+ screenshot?: string
73
+ timestamp?: number
74
+
75
+ key_moments:
76
+ - description: string
77
+ visual_cue: string
78
+
79
+ ui_elements:
80
+ - type: "button" | "input" | "menu" | "modal"
81
+ description: string
82
+ importance: "high" | "medium" | "low"
83
+
84
+ suggestions:
85
+ - string
86
+ ```
87
+
88
+ ## 约束
89
+
90
+ - 只做分析与结构化输出,不写脚本或分镜
91
+ - 录屏时考虑后期剪辑需求,标记关键操作点
92
+ - 对 SaaS 产品重点关注:核心功能、用户价值、操作流程
@@ -0,0 +1,36 @@
1
+ {
2
+ "project": {
3
+ "name": "视频创作系统",
4
+ "description": "专注于产品教程、Demo演示、SaaS介绍的智能视频创作",
5
+ "version": "1.0.0"
6
+ },
7
+ "agents": {
8
+ "perceiver": {
9
+ "path": "./agents/perceiver.md",
10
+ "description": "感知Agent - 浏览器访问、内容分析、操作录制"
11
+ },
12
+ "creator": {
13
+ "path": "./agents/creator.md",
14
+ "description": "创造Agent - 脚本编写、分镜设计、风格配置"
15
+ },
16
+ "crafter": {
17
+ "path": "./agents/crafter.md",
18
+ "description": "执行Agent - 语音合成、素材下载、录屏调度"
19
+ },
20
+ "composer-agent": {
21
+ "path": "./agents/composer-agent.md",
22
+ "description": "合成Agent - Remotion项目生成与渲染"
23
+ }
24
+ },
25
+ "skills": {
26
+ "creation": ["script-writer", "style-director"],
27
+ "execution": ["voice-synthesizer", "youtube-downloader", "agent-browser"],
28
+ "composition": ["remotion-best-practices"]
29
+ },
30
+ "tools": {
31
+ "agent-browser": {
32
+ "url": "https://agent-browser.dev/",
33
+ "description": "浏览器自动化CLI,用于网页访问和录屏"
34
+ }
35
+ }
36
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "WebFetch(domain:agent-browser.dev)",
5
+ "WebSearch",
6
+ "WebFetch(domain:github.com)",
7
+ "WebFetch(domain:ai.google.dev)",
8
+ "Bash(xargs:*)",
9
+ "Bash(claude skill install:*)",
10
+ "Bash(claude skill --help:*)",
11
+ "Bash(claude install --help:*)",
12
+ "WebFetch(domain:raw.githubusercontent.com)",
13
+ "Bash(git clone:*)",
14
+ "Bash(ls:*)",
15
+ "WebFetch(domain:docs.anthropic.com)",
16
+ "Bash(edge-tts:*)",
17
+ "Bash(npm init -y)",
18
+ "Bash(npm install:*)",
19
+ "Bash(npx remotion render:*)",
20
+ "Bash(ffprobe:*)",
21
+ "WebFetch(domain:www.anthropic.com)",
22
+ "WebFetch(domain:www.datacamp.com)",
23
+ "WebFetch(domain:nader.substack.com)",
24
+ "Bash(npx tsx:*)",
25
+ "Bash(npx tsc:*)",
26
+ "Bash(npx eslint:*)",
27
+ "Bash(test:*)",
28
+ "Bash(python3:*)",
29
+ "Bash(chmod:*)",
30
+ "Bash(pip3 install:*)",
31
+ "Bash(curl:*)",
32
+ "WebFetch(domain:mp.weixin.qq.com)",
33
+ "Bash(node:*)",
34
+ "Bash(npm --version:*)",
35
+ "Bash(npm run render)",
36
+ "Bash(mediainfo:*)"
37
+ ]
38
+ }
39
+ }
@@ -0,0 +1,349 @@
1
+ ---
2
+ name: agent-browser
3
+ description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
4
+ allowed-tools: Bash(agent-browser:*)
5
+ ---
6
+
7
+ # Browser Automation with agent-browser
8
+
9
+ ## Quick start
10
+
11
+ ```bash
12
+ agent-browser open <url> # Navigate to page
13
+ agent-browser snapshot -i # Get interactive elements with refs
14
+ agent-browser click @e1 # Click element by ref
15
+ agent-browser fill @e2 "text" # Fill input by ref
16
+ agent-browser close # Close browser
17
+ ```
18
+
19
+ ## Core workflow
20
+
21
+ 1. Navigate: `agent-browser open <url>`
22
+ 2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
23
+ 3. Interact using refs from the snapshot
24
+ 4. Re-snapshot after navigation or significant DOM changes
25
+
26
+ ## Commands
27
+
28
+ ### Navigation
29
+
30
+ ```bash
31
+ agent-browser open <url> # Navigate to URL (aliases: goto, navigate)
32
+ # Supports: https://, http://, file://, about:, data://
33
+ # Auto-prepends https:// if no protocol given
34
+ agent-browser back # Go back
35
+ agent-browser forward # Go forward
36
+ agent-browser reload # Reload page
37
+ agent-browser close # Close browser (aliases: quit, exit)
38
+ agent-browser connect 9222 # Connect to browser via CDP port
39
+ ```
40
+
41
+ ### Snapshot (page analysis)
42
+
43
+ ```bash
44
+ agent-browser snapshot # Full accessibility tree
45
+ agent-browser snapshot -i # Interactive elements only (recommended)
46
+ agent-browser snapshot -c # Compact output
47
+ agent-browser snapshot -d 3 # Limit depth to 3
48
+ agent-browser snapshot -s "#main" # Scope to CSS selector
49
+ ```
50
+
51
+ ### Interactions (use @refs from snapshot)
52
+
53
+ ```bash
54
+ agent-browser click @e1 # Click
55
+ agent-browser dblclick @e1 # Double-click
56
+ agent-browser focus @e1 # Focus element
57
+ agent-browser fill @e2 "text" # Clear and type
58
+ agent-browser type @e2 "text" # Type without clearing
59
+ agent-browser press Enter # Press key (alias: key)
60
+ agent-browser press Control+a # Key combination
61
+ agent-browser keydown Shift # Hold key down
62
+ agent-browser keyup Shift # Release key
63
+ agent-browser hover @e1 # Hover
64
+ agent-browser check @e1 # Check checkbox
65
+ agent-browser uncheck @e1 # Uncheck checkbox
66
+ agent-browser select @e1 "value" # Select dropdown option
67
+ agent-browser select @e1 "a" "b" # Select multiple options
68
+ agent-browser scroll down 500 # Scroll page (default: down 300px)
69
+ agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto)
70
+ agent-browser drag @e1 @e2 # Drag and drop
71
+ agent-browser upload @e1 file.pdf # Upload files
72
+ ```
73
+
74
+ ### Get information
75
+
76
+ ```bash
77
+ agent-browser get text @e1 # Get element text
78
+ agent-browser get html @e1 # Get innerHTML
79
+ agent-browser get value @e1 # Get input value
80
+ agent-browser get attr @e1 href # Get attribute
81
+ agent-browser get title # Get page title
82
+ agent-browser get url # Get current URL
83
+ agent-browser get count ".item" # Count matching elements
84
+ agent-browser get box @e1 # Get bounding box
85
+ agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.)
86
+ ```
87
+
88
+ ### Check state
89
+
90
+ ```bash
91
+ agent-browser is visible @e1 # Check if visible
92
+ agent-browser is enabled @e1 # Check if enabled
93
+ agent-browser is checked @e1 # Check if checked
94
+ ```
95
+
96
+ ### Screenshots & PDF
97
+
98
+ ```bash
99
+ agent-browser screenshot # Screenshot to stdout
100
+ agent-browser screenshot path.png # Save to file
101
+ agent-browser screenshot --full # Full page
102
+ agent-browser pdf output.pdf # Save as PDF
103
+ ```
104
+
105
+ ### Video recording
106
+
107
+ ```bash
108
+ agent-browser record start ./demo.webm # Start recording (uses current URL + state)
109
+ agent-browser click @e1 # Perform actions
110
+ agent-browser record stop # Stop and save video
111
+ agent-browser record restart ./take2.webm # Stop current + start new recording
112
+ ```
113
+
114
+ Recording creates a fresh context but preserves cookies/storage from your session. If no URL is provided, it
115
+ automatically returns to your current page. For smooth demos, explore first, then start recording.
116
+
117
+ ### Wait
118
+
119
+ ```bash
120
+ agent-browser wait @e1 # Wait for element
121
+ agent-browser wait 2000 # Wait milliseconds
122
+ agent-browser wait --text "Success" # Wait for text (or -t)
123
+ agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u)
124
+ agent-browser wait --load networkidle # Wait for network idle (or -l)
125
+ agent-browser wait --fn "window.ready" # Wait for JS condition (or -f)
126
+ ```
127
+
128
+ ### Mouse control
129
+
130
+ ```bash
131
+ agent-browser mouse move 100 200 # Move mouse
132
+ agent-browser mouse down left # Press button
133
+ agent-browser mouse up left # Release button
134
+ agent-browser mouse wheel 100 # Scroll wheel
135
+ ```
136
+
137
+ ### Semantic locators (alternative to refs)
138
+
139
+ ```bash
140
+ agent-browser find role button click --name "Submit"
141
+ agent-browser find text "Sign In" click
142
+ agent-browser find text "Sign In" click --exact # Exact match only
143
+ agent-browser find label "Email" fill "user@test.com"
144
+ agent-browser find placeholder "Search" type "query"
145
+ agent-browser find alt "Logo" click
146
+ agent-browser find title "Close" click
147
+ agent-browser find testid "submit-btn" click
148
+ agent-browser find first ".item" click
149
+ agent-browser find last ".item" click
150
+ agent-browser find nth 2 "a" hover
151
+ ```
152
+
153
+ ### Browser settings
154
+
155
+ ```bash
156
+ agent-browser set viewport 1920 1080 # Set viewport size
157
+ agent-browser set device "iPhone 14" # Emulate device
158
+ agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation)
159
+ agent-browser set offline on # Toggle offline mode
160
+ agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
161
+ agent-browser set credentials user pass # HTTP basic auth (alias: auth)
162
+ agent-browser set media dark # Emulate color scheme
163
+ agent-browser set media light reduced-motion # Light mode + reduced motion
164
+ ```
165
+
166
+ ### Cookies & Storage
167
+
168
+ ```bash
169
+ agent-browser cookies # Get all cookies
170
+ agent-browser cookies set name value # Set cookie
171
+ agent-browser cookies clear # Clear cookies
172
+ agent-browser storage local # Get all localStorage
173
+ agent-browser storage local key # Get specific key
174
+ agent-browser storage local set k v # Set value
175
+ agent-browser storage local clear # Clear all
176
+ ```
177
+
178
+ ### Network
179
+
180
+ ```bash
181
+ agent-browser network route <url> # Intercept requests
182
+ agent-browser network route <url> --abort # Block requests
183
+ agent-browser network route <url> --body '{}' # Mock response
184
+ agent-browser network unroute [url] # Remove routes
185
+ agent-browser network requests # View tracked requests
186
+ agent-browser network requests --filter api # Filter requests
187
+ ```
188
+
189
+ ### Tabs & Windows
190
+
191
+ ```bash
192
+ agent-browser tab # List tabs
193
+ agent-browser tab new [url] # New tab
194
+ agent-browser tab 2 # Switch to tab by index
195
+ agent-browser tab close # Close current tab
196
+ agent-browser tab close 2 # Close tab by index
197
+ agent-browser window new # New window
198
+ ```
199
+
200
+ ### Frames
201
+
202
+ ```bash
203
+ agent-browser frame "#iframe" # Switch to iframe
204
+ agent-browser frame main # Back to main frame
205
+ ```
206
+
207
+ ### Dialogs
208
+
209
+ ```bash
210
+ agent-browser dialog accept [text] # Accept dialog
211
+ agent-browser dialog dismiss # Dismiss dialog
212
+ ```
213
+
214
+ ### JavaScript
215
+
216
+ ```bash
217
+ agent-browser eval "document.title" # Run JavaScript
218
+ ```
219
+
220
+ ## Global options
221
+
222
+ ```bash
223
+ agent-browser --session <name> ... # Isolated browser session
224
+ agent-browser --json ... # JSON output for parsing
225
+ agent-browser --headed ... # Show browser window (not headless)
226
+ agent-browser --full ... # Full page screenshot (-f)
227
+ agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol
228
+ agent-browser -p <provider> ... # Cloud browser provider (--provider)
229
+ agent-browser --proxy <url> ... # Use proxy server
230
+ agent-browser --headers <json> ... # HTTP headers scoped to URL's origin
231
+ agent-browser --executable-path <p> # Custom browser executable
232
+ agent-browser --extension <path> ... # Load browser extension (repeatable)
233
+ agent-browser --help # Show help (-h)
234
+ agent-browser --version # Show version (-V)
235
+ agent-browser <command> --help # Show detailed help for a command
236
+ ```
237
+
238
+ ### Proxy support
239
+
240
+ ```bash
241
+ agent-browser --proxy http://proxy.com:8080 open example.com
242
+ agent-browser --proxy http://user:pass@proxy.com:8080 open example.com
243
+ agent-browser --proxy socks5://proxy.com:1080 open example.com
244
+ ```
245
+
246
+ ## Environment variables
247
+
248
+ ```bash
249
+ AGENT_BROWSER_SESSION="mysession" # Default session name
250
+ AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path
251
+ AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths
252
+ AGENT_BROWSER_PROVIDER="browserbase" # Cloud browser provider
253
+ AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port
254
+ AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daemon.js)
255
+ ```
256
+
257
+ ## Example: Form submission
258
+
259
+ ```bash
260
+ agent-browser open https://example.com/form
261
+ agent-browser snapshot -i
262
+ # Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
263
+
264
+ agent-browser fill @e1 "user@example.com"
265
+ agent-browser fill @e2 "password123"
266
+ agent-browser click @e3
267
+ agent-browser wait --load networkidle
268
+ agent-browser snapshot -i # Check result
269
+ ```
270
+
271
+ ## Example: Authentication with saved state
272
+
273
+ ```bash
274
+ # Login once
275
+ agent-browser open https://app.example.com/login
276
+ agent-browser snapshot -i
277
+ agent-browser fill @e1 "username"
278
+ agent-browser fill @e2 "password"
279
+ agent-browser click @e3
280
+ agent-browser wait --url "**/dashboard"
281
+ agent-browser state save auth.json
282
+
283
+ # Later sessions: load saved state
284
+ agent-browser state load auth.json
285
+ agent-browser open https://app.example.com/dashboard
286
+ ```
287
+
288
+ ## Sessions (parallel browsers)
289
+
290
+ ```bash
291
+ agent-browser --session test1 open site-a.com
292
+ agent-browser --session test2 open site-b.com
293
+ agent-browser session list
294
+ ```
295
+
296
+ ## JSON output (for parsing)
297
+
298
+ Add `--json` for machine-readable output:
299
+
300
+ ```bash
301
+ agent-browser snapshot -i --json
302
+ agent-browser get text @e1 --json
303
+ ```
304
+
305
+ ## Debugging
306
+
307
+ ```bash
308
+ agent-browser --headed open example.com # Show browser window
309
+ agent-browser --cdp 9222 snapshot # Connect via CDP port
310
+ agent-browser connect 9222 # Alternative: connect command
311
+ agent-browser console # View console messages
312
+ agent-browser console --clear # Clear console
313
+ agent-browser errors # View page errors
314
+ agent-browser errors --clear # Clear errors
315
+ agent-browser highlight @e1 # Highlight element
316
+ agent-browser trace start # Start recording trace
317
+ agent-browser trace stop trace.zip # Stop and save trace
318
+ agent-browser record start ./debug.webm # Record video from current page
319
+ agent-browser record stop # Save recording
320
+ ```
321
+
322
+ ## Deep-dive documentation
323
+
324
+ For detailed patterns and best practices, see:
325
+
326
+ | Reference | Description |
327
+ |-----------|-------------|
328
+ | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
329
+ | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
330
+ | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
331
+ | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
332
+ | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
333
+
334
+ ## Ready-to-use templates
335
+
336
+ Executable workflow scripts for common patterns:
337
+
338
+ | Template | Description |
339
+ |----------|-------------|
340
+ | [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation |
341
+ | [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state |
342
+ | [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots |
343
+
344
+ Usage:
345
+ ```bash
346
+ ./templates/form-automation.sh https://example.com/form
347
+ ./templates/authenticated-session.sh https://app.example.com/login
348
+ ./templates/capture-workflow.sh https://example.com ./output
349
+ ```
@@ -0,0 +1,188 @@
1
+ # Authentication Patterns
2
+
3
+ Patterns for handling login flows, session persistence, and authenticated browsing.
4
+
5
+ ## Basic Login Flow
6
+
7
+ ```bash
8
+ # Navigate to login page
9
+ agent-browser open https://app.example.com/login
10
+ agent-browser wait --load networkidle
11
+
12
+ # Get form elements
13
+ agent-browser snapshot -i
14
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
15
+
16
+ # Fill credentials
17
+ agent-browser fill @e1 "user@example.com"
18
+ agent-browser fill @e2 "password123"
19
+
20
+ # Submit
21
+ agent-browser click @e3
22
+ agent-browser wait --load networkidle
23
+
24
+ # Verify login succeeded
25
+ agent-browser get url # Should be dashboard, not login
26
+ ```
27
+
28
+ ## Saving Authentication State
29
+
30
+ After logging in, save state for reuse:
31
+
32
+ ```bash
33
+ # Login first (see above)
34
+ agent-browser open https://app.example.com/login
35
+ agent-browser snapshot -i
36
+ agent-browser fill @e1 "user@example.com"
37
+ agent-browser fill @e2 "password123"
38
+ agent-browser click @e3
39
+ agent-browser wait --url "**/dashboard"
40
+
41
+ # Save authenticated state
42
+ agent-browser state save ./auth-state.json
43
+ ```
44
+
45
+ ## Restoring Authentication
46
+
47
+ Skip login by loading saved state:
48
+
49
+ ```bash
50
+ # Load saved auth state
51
+ agent-browser state load ./auth-state.json
52
+
53
+ # Navigate directly to protected page
54
+ agent-browser open https://app.example.com/dashboard
55
+
56
+ # Verify authenticated
57
+ agent-browser snapshot -i
58
+ ```
59
+
60
+ ## OAuth / SSO Flows
61
+
62
+ For OAuth redirects:
63
+
64
+ ```bash
65
+ # Start OAuth flow
66
+ agent-browser open https://app.example.com/auth/google
67
+
68
+ # Handle redirects automatically
69
+ agent-browser wait --url "**/accounts.google.com**"
70
+ agent-browser snapshot -i
71
+
72
+ # Fill Google credentials
73
+ agent-browser fill @e1 "user@gmail.com"
74
+ agent-browser click @e2 # Next button
75
+ agent-browser wait 2000
76
+ agent-browser snapshot -i
77
+ agent-browser fill @e3 "password"
78
+ agent-browser click @e4 # Sign in
79
+
80
+ # Wait for redirect back
81
+ agent-browser wait --url "**/app.example.com**"
82
+ agent-browser state save ./oauth-state.json
83
+ ```
84
+
85
+ ## Two-Factor Authentication
86
+
87
+ Handle 2FA with manual intervention:
88
+
89
+ ```bash
90
+ # Login with credentials
91
+ agent-browser open https://app.example.com/login --headed # Show browser
92
+ agent-browser snapshot -i
93
+ agent-browser fill @e1 "user@example.com"
94
+ agent-browser fill @e2 "password123"
95
+ agent-browser click @e3
96
+
97
+ # Wait for user to complete 2FA manually
98
+ echo "Complete 2FA in the browser window..."
99
+ agent-browser wait --url "**/dashboard" --timeout 120000
100
+
101
+ # Save state after 2FA
102
+ agent-browser state save ./2fa-state.json
103
+ ```
104
+
105
+ ## HTTP Basic Auth
106
+
107
+ For sites using HTTP Basic Authentication:
108
+
109
+ ```bash
110
+ # Set credentials before navigation
111
+ agent-browser set credentials username password
112
+
113
+ # Navigate to protected resource
114
+ agent-browser open https://protected.example.com/api
115
+ ```
116
+
117
+ ## Cookie-Based Auth
118
+
119
+ Manually set authentication cookies:
120
+
121
+ ```bash
122
+ # Set auth cookie
123
+ agent-browser cookies set session_token "abc123xyz"
124
+
125
+ # Navigate to protected page
126
+ agent-browser open https://app.example.com/dashboard
127
+ ```
128
+
129
+ ## Token Refresh Handling
130
+
131
+ For sessions with expiring tokens:
132
+
133
+ ```bash
134
+ #!/bin/bash
135
+ # Wrapper that handles token refresh
136
+
137
+ STATE_FILE="./auth-state.json"
138
+
139
+ # Try loading existing state
140
+ if [[ -f "$STATE_FILE" ]]; then
141
+ agent-browser state load "$STATE_FILE"
142
+ agent-browser open https://app.example.com/dashboard
143
+
144
+ # Check if session is still valid
145
+ URL=$(agent-browser get url)
146
+ if [[ "$URL" == *"/login"* ]]; then
147
+ echo "Session expired, re-authenticating..."
148
+ # Perform fresh login
149
+ agent-browser snapshot -i
150
+ agent-browser fill @e1 "$USERNAME"
151
+ agent-browser fill @e2 "$PASSWORD"
152
+ agent-browser click @e3
153
+ agent-browser wait --url "**/dashboard"
154
+ agent-browser state save "$STATE_FILE"
155
+ fi
156
+ else
157
+ # First-time login
158
+ agent-browser open https://app.example.com/login
159
+ # ... login flow ...
160
+ fi
161
+ ```
162
+
163
+ ## Security Best Practices
164
+
165
+ 1. **Never commit state files** - They contain session tokens
166
+ ```bash
167
+ echo "*.auth-state.json" >> .gitignore
168
+ ```
169
+
170
+ 2. **Use environment variables for credentials**
171
+ ```bash
172
+ agent-browser fill @e1 "$APP_USERNAME"
173
+ agent-browser fill @e2 "$APP_PASSWORD"
174
+ ```
175
+
176
+ 3. **Clean up after automation**
177
+ ```bash
178
+ agent-browser cookies clear
179
+ rm -f ./auth-state.json
180
+ ```
181
+
182
+ 4. **Use short-lived sessions for CI/CD**
183
+ ```bash
184
+ # Don't persist state in CI
185
+ agent-browser open https://app.example.com/login
186
+ # ... login and perform actions ...
187
+ agent-browser close # Session ends, nothing persisted
188
+ ```