@optima-chat/optima-agent 0.8.46 → 0.8.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/.claude/settings.local.json +154 -0
  2. package/.claude/skills/browser/SKILL.md +328 -77
  3. package/dist/bin/bi-cli.js +0 -0
  4. package/dist/bin/comfy.d.ts +3 -0
  5. package/dist/bin/comfy.d.ts.map +1 -0
  6. package/dist/bin/comfy.js +3 -0
  7. package/dist/bin/comfy.js.map +1 -0
  8. package/dist/bin/commerce.js +0 -0
  9. package/dist/bin/google-ads.js +0 -0
  10. package/dist/bin/optima.js +0 -0
  11. package/dist/bin/scout.js +0 -0
  12. package/dist/src/agent.d.ts +1 -1
  13. package/dist/src/hooks-loader.d.ts +6 -0
  14. package/dist/src/hooks-loader.d.ts.map +1 -0
  15. package/dist/src/hooks-loader.js +215 -0
  16. package/dist/src/hooks-loader.js.map +1 -0
  17. package/dist/src/system-prompt.d.ts.map +1 -1
  18. package/dist/src/system-prompt.js +13 -21
  19. package/dist/src/system-prompt.js.map +1 -1
  20. package/dist/src/ui/App.d.ts +6 -0
  21. package/dist/src/ui/App.d.ts.map +1 -0
  22. package/dist/src/ui/App.js +164 -0
  23. package/dist/src/ui/App.js.map +1 -0
  24. package/dist/src/ui/components/Composer.d.ts +10 -0
  25. package/dist/src/ui/components/Composer.d.ts.map +1 -0
  26. package/dist/src/ui/components/Composer.js +13 -0
  27. package/dist/src/ui/components/Composer.js.map +1 -0
  28. package/dist/src/ui/components/Header.d.ts +7 -0
  29. package/dist/src/ui/components/Header.d.ts.map +1 -0
  30. package/dist/src/ui/components/Header.js +7 -0
  31. package/dist/src/ui/components/Header.js.map +1 -0
  32. package/dist/src/ui/components/Message.d.ts +12 -0
  33. package/dist/src/ui/components/Message.d.ts.map +1 -0
  34. package/dist/src/ui/components/Message.js +21 -0
  35. package/dist/src/ui/components/Message.js.map +1 -0
  36. package/dist/src/ui/components/MessageList.d.ts +9 -0
  37. package/dist/src/ui/components/MessageList.d.ts.map +1 -0
  38. package/dist/src/ui/components/MessageList.js +18 -0
  39. package/dist/src/ui/components/MessageList.js.map +1 -0
  40. package/dist/src/ui/components/Spinner.d.ts +6 -0
  41. package/dist/src/ui/components/Spinner.d.ts.map +1 -0
  42. package/dist/src/ui/components/Spinner.js +7 -0
  43. package/dist/src/ui/components/Spinner.js.map +1 -0
  44. package/dist/src/ui/components/StatusBar.d.ts +11 -0
  45. package/dist/src/ui/components/StatusBar.d.ts.map +1 -0
  46. package/dist/src/ui/components/StatusBar.js +7 -0
  47. package/dist/src/ui/components/StatusBar.js.map +1 -0
  48. package/dist/src/ui/components/index.d.ts +7 -0
  49. package/dist/src/ui/components/index.d.ts.map +1 -0
  50. package/dist/src/ui/components/index.js +7 -0
  51. package/dist/src/ui/components/index.js.map +1 -0
  52. package/dist/src/validation/error-formatter.d.ts +21 -0
  53. package/dist/src/validation/error-formatter.d.ts.map +1 -0
  54. package/dist/src/validation/error-formatter.js +98 -0
  55. package/dist/src/validation/error-formatter.js.map +1 -0
  56. package/dist/src/validation/index.d.ts +10 -0
  57. package/dist/src/validation/index.d.ts.map +1 -0
  58. package/dist/src/validation/index.js +10 -0
  59. package/dist/src/validation/index.js.map +1 -0
  60. package/dist/src/validation/json-validator.d.ts +25 -0
  61. package/dist/src/validation/json-validator.d.ts.map +1 -0
  62. package/dist/src/validation/json-validator.js +173 -0
  63. package/dist/src/validation/json-validator.js.map +1 -0
  64. package/dist/src/validation/schema.d.ts +353 -0
  65. package/dist/src/validation/schema.d.ts.map +1 -0
  66. package/dist/src/validation/schema.js +57 -0
  67. package/dist/src/validation/schema.js.map +1 -0
  68. package/dist/src/validation/suggestions.d.ts +25 -0
  69. package/dist/src/validation/suggestions.d.ts.map +1 -0
  70. package/dist/src/validation/suggestions.js +144 -0
  71. package/dist/src/validation/suggestions.js.map +1 -0
  72. package/dist/src/validation/types.d.ts +40 -0
  73. package/dist/src/validation/types.d.ts.map +1 -0
  74. package/dist/src/validation/types.js +5 -0
  75. package/dist/src/validation/types.js.map +1 -0
  76. package/dist/src/validation/yaml-validator.d.ts +25 -0
  77. package/dist/src/validation/yaml-validator.d.ts.map +1 -0
  78. package/dist/src/validation/yaml-validator.js +177 -0
  79. package/dist/src/validation/yaml-validator.js.map +1 -0
  80. package/package.json +1 -1
@@ -0,0 +1,154 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(gh api:*)",
5
+ "WebFetch(domain:platform.claude.com)",
6
+ "Bash(git init:*)",
7
+ "Bash(mkdir:*)",
8
+ "Bash(npm run typecheck:*)",
9
+ "Bash(npm view:*)",
10
+ "WebSearch",
11
+ "Bash(commerce --help)",
12
+ "Bash(done)",
13
+ "Bash(commerce product:*)",
14
+ "Bash(commerce order:*)",
15
+ "Bash(commerce i18n:*)",
16
+ "Bash(google-ads:*)",
17
+ "Bash(scout --help:*)",
18
+ "Bash(tree:*)",
19
+ "Bash(cloc:*)",
20
+ "Bash(npm run build:*)",
21
+ "Bash(git restore:*)",
22
+ "Bash(gh repo view:*)",
23
+ "Bash(mv:*)",
24
+ "Bash(rmdir:*)",
25
+ "Bash(git add:*)",
26
+ "Bash(git commit:*)",
27
+ "Bash(git push)",
28
+ "Bash(timeout 5 npm run optima:*)",
29
+ "Bash(npm install:*)",
30
+ "Bash(cat:*)",
31
+ "Bash(gh issue create:*)",
32
+ "Bash(npx tsx:*)",
33
+ "Bash(timeout 30 npx tsx:*)",
34
+ "Bash(git push origin feature/ask-user-question)",
35
+ "Bash(node:*)",
36
+ "Bash(npm version:*)",
37
+ "Bash(git push:*)",
38
+ "Bash(npm publish:*)",
39
+ "Bash(pkill:*)",
40
+ "Bash(git -C /Users/verypro/optima-agent log --oneline --all -- \".claude/\")",
41
+ "Bash(wc:*)",
42
+ "Bash(grep:*)",
43
+ "Bash(find:*)",
44
+ "Bash(commerce collection --help:*)",
45
+ "Bash(commerce collection update --help:*)",
46
+ "Bash(commerce collection set-cover:*)",
47
+ "Bash(commerce collection get --help:*)",
48
+ "Bash(commerce collection list --help:*)",
49
+ "Bash(commerce collection create --help:*)",
50
+ "Bash(commerce collection remove-products:*)",
51
+ "Bash(commerce collection list-products:*)",
52
+ "Bash(commerce --version:*)",
53
+ "Bash(bi-cli --version:*)",
54
+ "Bash(commerce homepage create --help:*)",
55
+ "Bash(commerce homepage reorder --help:*)",
56
+ "Bash(commerce homepage delete --help:*)",
57
+ "Bash(commerce homepage update-images:*)",
58
+ "Bash(commerce homepage update-collections:*)",
59
+ "Bash(commerce homepage update-target:*)",
60
+ "Bash(commerce homepage switch-template:*)",
61
+ "Bash(commerce inventory:*)",
62
+ "Bash(commerce merchant:*)",
63
+ "Bash(commerce review:*)",
64
+ "Bash(commerce product-page:*)",
65
+ "Bash(bi-cli:*)",
66
+ "Bash(comfy:*)",
67
+ "Bash(scout search:*)",
68
+ "Bash(scout product:*)",
69
+ "Bash(commerce homepage create-collections:*)",
70
+ "Bash(commerce homepage create-featured:*)",
71
+ "Bash(commerce homepage create-collection-products:*)",
72
+ "Bash(commerce homepage create-banner:*)",
73
+ "Bash(xargs -I {} sh -c 'echo \"\"\"\"=== {} ===\"\"\"\"; head -3 /Users/verypro/optima-agent/.claude/skills/{}/SKILL.md | grep \"\"\"\"name:\"\"\"\"')",
74
+ "Bash(ls:*)",
75
+ "Bash(gh issue view:*)",
76
+ "Bash(npx markdownlint-cli:*)",
77
+ "Bash(chmod:*)",
78
+ "Bash(npm whoami:*)",
79
+ "Bash(tsx test-scripts/test-headless-progress.ts:*)",
80
+ "Bash(DEBUG_STREAM=1 node dist/bin/optima.js:*)",
81
+ "Bash(git describe:*)",
82
+ "WebFetch(domain:github.com)",
83
+ "Bash(./scripts/test-headless.sh:*)",
84
+ "Bash(./scripts/test-headless-simple.sh:*)",
85
+ "Bash(env)",
86
+ "Bash(gh pr list:*)",
87
+ "Bash(gh pr view:*)",
88
+ "Bash(gh pr diff:*)",
89
+ "Bash(optima --version:*)",
90
+ "Bash(optima agent headless:*)",
91
+ "Bash(optima headless:*)",
92
+ "Bash(/Users/verypro/optima-agent/scripts/test-headless.sh:*)",
93
+ "Bash(/Users/verypro/optima-agent/scripts/test-headless-simple.sh:*)",
94
+ "Bash(tee:*)",
95
+ "Bash(CONV_ID=\"conv-1\":*)",
96
+ "Bash(echo:*)",
97
+ "Bash(scout tiktok trending --help:*)",
98
+ "Bash(scout tiktok trending:*)",
99
+ "Bash(git checkout:*)",
100
+ "Bash(npm test:*)",
101
+ "Bash(git tag:*)",
102
+ "Bash(/private/tmp/claude/-Users-verypro-optima-agent/68a9ac2c-def2-44e1-b42b-e53bd9022ab6/scratchpad/test-canUseTool.sh)",
103
+ "Bash(optima --help:*)",
104
+ "Bash(npx @optima-chat/ads-cli:*)",
105
+ "Bash(head:*)",
106
+ "Bash(git pull:*)",
107
+ "Bash(pnpm build:*)",
108
+ "Skill(read-code)",
109
+ "Bash(npm run cli:*)",
110
+ "Bash(scout:*)",
111
+ "WebFetch(domain:docs.scrapecreators.com)",
112
+ "WebFetch(domain:scrapecreators.com)",
113
+ "Bash(gh auth status:*)",
114
+ "Bash(optima-agent:*)",
115
+ "Bash(python3:*)",
116
+ "Bash(python3 -c \" import sys content = sys.stdin.read\\(\\) # Find flushMessageQueueSync idx = content.find\\(''flushMessageQueueSync''\\) # Get context around it lines = content.split\\(''\\\\n''\\) for i, line in enumerate\\(lines\\): if ''flushMessageQueueSync'' in line and ''private'' in line: for j in range\\(i, min\\(i+20, len\\(lines\\)\\)\\): print\\(f''{j+1}: {lines[j]}''\\) break \")",
117
+ "Bash(optima:*)",
118
+ "WebFetch(domain:www.npmjs.com)",
119
+ "WebFetch(domain:registry.npmjs.org)",
120
+ "WebFetch(domain:zod.dev)",
121
+ "Bash(npm ls:*)",
122
+ "Bash(NODE_DEBUG=child_process npx tsx:*)",
123
+ "Bash(DEBUG_CLAUDE_AGENT_SDK=1 npx tsx:*)",
124
+ "Bash(CLAUDECODE= npx tsx:*)",
125
+ "Bash(env:*)",
126
+ "Bash(gh release:*)",
127
+ "Bash(npm info:*)",
128
+ "Bash(gh run:*)",
129
+ "Bash(gtimeout 90:*)",
130
+ "Bash(sentinel:*)",
131
+ "Bash(gh pr:*)",
132
+ "Bash(git fetch:*)",
133
+ "Bash(git log:*)",
134
+ "Bash(npm bin:*)",
135
+ "Bash(git status:*)",
136
+ "Bash(browser-cli --version && browser-cli --help)",
137
+ "Bash(browser-cli status:*)",
138
+ "Bash(browser-cli launch:*)",
139
+ "Bash(browser-cli screenshot:*)",
140
+ "Bash(browser-cli --version)",
141
+ "Bash(browser-cli close:*)",
142
+ "Bash(npm update:*)",
143
+ "Bash(node -e \"console.log\\(require\\('./node_modules/@optima-chat/browser-cli/package.json'\\).version\\)\")",
144
+ "WebFetch(domain:raw.githubusercontent.com)",
145
+ "Bash(aws ecs:*)",
146
+ "Bash(aws ecr:*)",
147
+ "Bash(gh search:*)",
148
+ "Bash(npx tsc:*)",
149
+ "Bash(find /Users/verypro/optima-agent -path */node_modules -prune -o -type f \\\\\\(-name *comfy* -o -name *gen* \\\\\\) -print)"
150
+ ],
151
+ "deny": [],
152
+ "ask": []
153
+ }
154
+ }
@@ -17,8 +17,20 @@ description: "Browser automation for web interactions. Use when user needs to op
17
17
  - 简单网页抓取 → 使用 WebFetch
18
18
  - 已有专用 skill 的任务(如 scout、tiktok、instagram、shein 等)
19
19
 
20
+ ## 脚本优先原则
21
+
22
+ 每次执行浏览器任务前,**先检查是否有现成脚本**:
23
+
24
+ 1. `browser-cli scripts` — 查看已有脚本(显示名称、描述、步骤)
25
+ 2. 如果有匹配的脚本 → 直接用 `browser-cli runscript <name> --param ...`
26
+ 3. 如果没有 → 手动探索完成任务,然后生成脚本并提交
27
+
28
+ 脚本执行零 token 消耗,比手动操作快 10 倍以上。
29
+
20
30
  ## 命令
21
31
 
32
+ ### 基础命令
33
+
22
34
  ```bash
23
35
  # 会话管理
24
36
  browser-cli launch [--url <url>] [--force] # 启动浏览器(--force 先关闭已有会话)
@@ -36,6 +48,7 @@ browser-cli input <index> <text> [--no-clear] # 输入文本(--no-clear 追
36
48
  browser-cli select <index> <option> # 选择下拉选项
37
49
  browser-cli scroll <up|down> [--pages <n>] # 滚动页面(默认 1 页)
38
50
  browser-cli keys <key> # 键盘操作(Enter、Tab 等)
51
+ browser-cli upload <index> <file...> # 上传文件到 file input(路径自动转换)
39
52
 
40
53
  # 页面检查
41
54
  browser-cli dom # 获取当前 DOM 状态
@@ -49,6 +62,23 @@ browser-cli switch <tab_id> # 切换标签页
49
62
  browser-cli close-tab <tab_id> # 关闭标签页
50
63
  ```
51
64
 
65
+ ### 脚本命令
66
+
67
+ ```bash
68
+ # 查看与管理
69
+ browser-cli scripts # 列出所有脚本(含描述和步骤)
70
+ browser-cli script-code <name> # 查看脚本源码
71
+ browser-cli delete-script <name> # 删除脚本
72
+
73
+ # 提交
74
+ browser-cli submit-script --name <name> --code-file <path> # 提交脚本(新建或覆盖)
75
+
76
+ # 执行
77
+ browser-cli runscript <name> --param key=value [...] # 执行脚本(自动轮询到完成)
78
+ browser-cli runscript <name> --param key=value --no-poll # 执行但不等待
79
+ browser-cli script-task <task-id> # 查询执行状态
80
+ ```
81
+
52
82
  ## DOM 索引格式
53
83
 
54
84
  每个操作都会返回当前 DOM 状态,格式如下:
@@ -62,105 +92,326 @@ browser-cli close-tab <tab_id> # 关闭标签页
62
92
  - 每次操作后索引会**重新分配** — 务必使用最新的索引
63
93
  - 使用这些索引来执行 `click`、`input` 和 `select` 命令
64
94
 
65
- ## 注意事项
95
+ ## 登录与 Profile
66
96
 
67
- - **DOM 自动返回**:每个操作会自动返回 DOM 状态 — 无需单独调用 `browser-cli dom`
68
- - **避免不必要的截图**:截图消耗 token,日常导航用 DOM 输出即可
69
- - **5 分钟超时**:无操作 5 分钟后会话自动关闭
70
- - **登录提示**:遇到登录页面时,向用户询问账号密码或验证码
71
- - **务必关闭**:操作完成后运行 `browser-cli close` 释放资源
97
+ 浏览器自动为每个用户保持一个持久化的 Chrome Profile。所有平台的登录态(cookies、session)保存在同一个 Profile 中。
98
+
99
+ - **首次登录**:需要用户提供账号和验证码,登录后自动保存
100
+ - **后续使用**:自动恢复登录态,无需重新登录
101
+ - **多平台**:小红书、抖音、微博等登录态共存于同一个 Profile
102
+
103
+ 无需任何参数或命令来管理 Profile,完全自动。
104
+
105
+ ## 文件路径
106
+
107
+ 浏览器后端可以直接读取用户工作空间的文件(通过共享 EFS)。传文件路径时直接使用你看到的路径即可,后端会自动转换:
108
+
109
+ ```
110
+ ~/images/photo.jpg → 自动转换 ✓
111
+ /home/aiuser/images/photo.jpg → 自动转换 ✓
112
+ /mnt/efs/{userId}/images/... → 自动转换 ✓
113
+ ```
114
+
115
+ 不需要上传文件,不需要手动转换路径。
116
+
117
+ ## 操作流程
118
+
119
+ ### 流程一:有脚本(常见)
72
120
 
73
- ## 基本操作示例
121
+ ```bash
122
+ # 1. 检查脚本
123
+ browser-cli scripts
124
+ # 输出:
125
+ # xiaohongshu-image-note — 在小红书创作者平台发布图文笔记
126
+ # [1] navigate — 打开小红书发布页
127
+ # [2] switch_tab — 切换到上传图文模式
128
+ # [3] upload — 上传图片 {images}
129
+ # ...
130
+
131
+ # 2. 直接执行
132
+ browser-cli runscript xiaohongshu-image-note \
133
+ --param title="今日分享" \
134
+ --param body="内容..." \
135
+ --param 'images=["~/images/photo1.jpg", "~/images/photo2.jpg"]'
136
+ # 输出: ✓ Script completed
137
+ ```
138
+
139
+ ### 流程二:无脚本(首次)
74
140
 
75
141
  ```bash
76
- # 1. 启动浏览器并导航
77
- browser-cli launch --url "https://example.com"
142
+ # 1. 检查脚本 — 没有匹配的
143
+ browser-cli scripts
144
+ # 输出: No scripts registered.
145
+
146
+ # 2. 手动探索
147
+ browser-cli launch --url https://creator.xiaohongshu.com
148
+ # → 如果需要登录,向用户询问手机号和验证码
149
+ browser-cli input 11 "18500400810"
150
+ browser-cli click 33 # 发送验证码
151
+ # (用户提供验证码)
152
+ browser-cli input 12 "123456"
153
+ browser-cli click 38 # 登录
154
+ # → 登录成功,继续操作...
155
+ browser-cli dom # 分析页面
156
+ browser-cli click 1880 # 点击"上传图文"
157
+ browser-cli dom # 找到 file input
158
+ browser-cli upload 1092 ~/images/photo.jpg # 上传图片
159
+ browser-cli input 2905 "标题" # 填写标题
160
+ browser-cli input 3432 "正文内容" # 填写正文
161
+ browser-cli click 2814 # 点击发布
162
+ # → 任务完成
163
+
164
+ # 3. 生成 Python 脚本
165
+ # 根据上面的探索轨迹,生成脚本文件,必须包含:
166
+ # - DESCRIPTION: 脚本描述
167
+ # - STEPS: 语义步骤列表(StepDef)
168
+ # - run(ctx): 异步执行函数
169
+ # 写到本地文件
170
+
171
+ # 4. 提交脚本
172
+ browser-cli submit-script \
173
+ --name xiaohongshu-image-note \
174
+ --code-file /tmp/xiaohongshu_image_note.py
175
+
176
+ # 5. 关闭浏览器
177
+ browser-cli close
178
+ ```
78
179
 
79
- # 2. 根据 DOM 索引进行交互
80
- browser-cli click 15 # 点击按钮
81
- browser-cli input 22 "hello" # 输入文本
82
- browser-cli keys Enter # 按回车
180
+ ### 流程三:脚本失败(Fallback)
83
181
 
84
- # 3. 完成后关闭
182
+ ```bash
183
+ # 1. 执行脚本
184
+ browser-cli runscript xiaohongshu-image-note --param ...
185
+ # 输出:
186
+ # ⚠ Script needs fallback
187
+ #
188
+ # ✓ Completed:
189
+ # [1] navigate — 打开小红书发布页
190
+ # [2] switch_tab — 切换到上传图文模式
191
+ #
192
+ # ✗ Failed at step [3] upload: file input not found
193
+ #
194
+ # → Remaining steps:
195
+ # [3] upload — 上传图片 ["~/photo.jpg"]
196
+ # [4] fill_title — 填写标题'测试'
197
+ # [5] fill_body — 填写正文'内容'
198
+ # [6] publish — 点击发布按钮
199
+ # [7] verify — 确认发布成功
200
+ #
201
+ # Session is still open — use browser-cli dom/click/input to continue
202
+
203
+ # 2. 接管完成剩余步骤(Session 仍然打开,不需要重新 launch)
204
+ browser-cli dom # 查看当前页面
205
+ browser-cli click 15 # 继续操作
206
+ browser-cli input 22 "测试"
207
+ browser-cli click 30
208
+ # → 任务完成
209
+
210
+ # 3. (可选) 更新脚本以修复问题
211
+ # 重新生成脚本,覆盖旧版本
212
+ browser-cli submit-script \
213
+ --name xiaohongshu-image-note \
214
+ --code-file /tmp/xiaohongshu_image_note_v2.py
215
+
216
+ # 4. 关闭浏览器
85
217
  browser-cli close
86
218
  ```
87
219
 
88
- ## Workflow(操作录制与回放)
220
+ ## 脚本代码规范
221
+
222
+ ### 基本结构
223
+
224
+ 每个脚本必须包含 `DESCRIPTION`、`STEPS` 和 `async def run(ctx)`:
225
+
226
+ ```python
227
+ """脚本描述。"""
228
+ from __future__ import annotations
229
+ import asyncio
230
+ import json
231
+ from app.scripts.base import ScriptContext, ScriptResult, StepDef, render_steps
232
+
233
+ DESCRIPTION = "一句话描述脚本功能"
234
+
235
+ STEPS = [
236
+ StepDef(id="step_id", description="步骤描述,支持 {param} 占位符"),
237
+ ]
238
+
239
+ PAGE_SETTLE = 2 # 页面操作后等待秒数
240
+
241
+ async def run(ctx: ScriptContext) -> ScriptResult:
242
+ bs = ctx.browser_session
243
+ steps = render_steps(STEPS, ctx.params)
244
+ completed: list[StepDef] = []
245
+
246
+ try:
247
+ # 逐步执行,每完成一步 append
248
+ # ...
249
+ return ScriptResult(status="completed", output={...}, completed_steps=completed)
250
+ except Exception as e:
251
+ remaining = [s for s in steps if s not in completed]
252
+ snapshot = await _capture_snapshot(bs)
253
+ return ScriptResult(
254
+ status="needs_fallback",
255
+ completed_steps=completed,
256
+ remaining_steps=remaining,
257
+ error=str(e),
258
+ snapshot=snapshot,
259
+ )
260
+
261
+ async def _capture_snapshot(bs) -> dict:
262
+ try:
263
+ state = await bs.get_browser_state_summary(include_screenshot=False)
264
+ dom = state.dom_state.llm_representation() if state and state.dom_state else ""
265
+ except Exception:
266
+ dom = ""
267
+ return {"dom": dom}
268
+ ```
89
269
 
90
- 将浏览器操作录制为可复用的 workflow,后续执行时零 token 消耗。
270
+ ### 浏览器操作 API
91
271
 
92
- ### Workflow 命令
272
+ ```python
273
+ # 导航
274
+ action = ctx.action_model(navigate={"url": "https://example.com"})
275
+ await ctx.controller.act(action, bs)
276
+ await asyncio.sleep(PAGE_SETTLE)
93
277
 
94
- ```bash
95
- # 保存与管理
96
- browser-cli save-workflow <name> [--description <desc>] # 保存当前 session 操作为 workflow
97
- browser-cli list-workflows # 列出所有 workflow
98
- browser-cli workflow-info <name> # 查看 workflow 详情(步骤列表)
99
- browser-cli delete-workflow <name> # 删除 workflow
100
-
101
- # 编辑(保存后参数化)
102
- browser-cli edit-workflow <name> --set-var --step <N> --name <varName> # 将步骤 N 的值设为变量
103
- browser-cli edit-workflow <name> --add-input <varName> --type string [--required] # 声明输入变量(type: string/number/bool)
104
- browser-cli edit-workflow <name> --add-step extract --selectors '<json>' [--screenshot] # 添加 extract 步骤
105
- browser-cli edit-workflow <name> --add-step wait_for_element --selector '<css>' [--timeout 30] # 添加等待步骤
106
- browser-cli edit-workflow <name> --remove-step <N> # 删除步骤
107
- browser-cli edit-workflow <name> --update-step <N> --requires-confirmation # 标记敏感操作
108
- browser-cli edit-workflow <name> --update-step <N> --target-text '<text>' # 修改定位文本
109
- browser-cli edit-workflow <name> --auto-clean # 自动清理冗余步骤
110
-
111
- # 执行与监控
112
- browser-cli run-workflow <name> [--param key=value ...] [--force] # 执行(--force 自动关闭已有 session)
113
- browser-cli workflow-status <task_id> # 查询执行状态
114
- browser-cli workflow-input <task_id> --var key=value # 提供变量值
115
- browser-cli workflow-input <task_id> --value <val> # 提供值(仅单变量时)
116
- browser-cli workflow-input <task_id> --confirm # 确认敏感操作
117
- browser-cli workflow-cancel <task_id> # 取消执行
118
-
119
- # 降级接管(workflow 失败后用 --session 继续操作)
120
- browser-cli --session <session_id> dom # 查看降级 session 的页面
121
- browser-cli --session <session_id> click 5 # 在降级 session 上操作
122
- browser-cli --session <session_id> close # 完成后关闭
278
+ # 点击(通过 DOM index)
279
+ action = ctx.action_model(click={"index": idx})
280
+ await ctx.controller.act(action, bs)
281
+
282
+ # 输入文本
283
+ action = ctx.action_model(input={"index": idx, "text": "内容"})
284
+ await ctx.controller.act(action, bs)
285
+
286
+ # 获取当前 DOM(每步操作前刷新)
287
+ state = await bs.get_browser_state_summary(include_screenshot=False, cached=False)
288
+ sm = state.dom_state.selector_map or {}
289
+
290
+ # 获取当前 URL
291
+ page = await bs.get_current_page()
292
+ url = await page.get_url()
123
293
  ```
124
294
 
125
- **注意**:
126
- - `--force` 会自动关闭已有 session 并取消运行中的 task
127
- - 不带 `--force` 时,如果有活跃 session 或运行中 task 会返回 409 错误
295
+ ### 元素查找(语义匹配,不要硬编码 index)
296
+
297
+ DOM index 每次页面变化后会重新分配,脚本中**不能硬编码 index**。
298
+ 必须通过语义属性动态查找:
299
+
300
+ ```python
301
+ # 按文字内容查找(如按钮、标签页)
302
+ def _find_by_text(sm, text, tag=None):
303
+ for idx, node in sm.items():
304
+ node_tag = (getattr(node, "tag_name", "") or "").lower()
305
+ if tag and node_tag != tag:
306
+ continue
307
+ try:
308
+ node_text = node.get_all_children_text().strip() if hasattr(node, "get_all_children_text") else ""
309
+ except Exception:
310
+ node_text = ""
311
+ if text in node_text:
312
+ return idx
313
+ return None
314
+
315
+ # 按 placeholder 查找(如输入框,支持 shadow DOM)
316
+ def _find_by_placeholder(sm, placeholder_contains):
317
+ for idx, node in sm.items():
318
+ attrs = getattr(node, "attributes", {}) or {}
319
+ if placeholder_contains in attrs.get("placeholder", ""):
320
+ return idx
321
+ # Shadow DOM fallback
322
+ if not attrs:
323
+ try:
324
+ llm = node.llm_representation() if hasattr(node, "llm_representation") else ""
325
+ if placeholder_contains in llm:
326
+ return idx
327
+ except Exception:
328
+ pass
329
+ return None
330
+
331
+ # 按 role 查找(如 contenteditable 编辑器)
332
+ def _find_by_role(sm, role, tag=None):
333
+ for idx, node in sm.items():
334
+ node_tag = (getattr(node, "tag_name", "") or "").lower()
335
+ if tag and node_tag != tag:
336
+ continue
337
+ attrs = getattr(node, "attributes", {}) or {}
338
+ if attrs.get("role") == role:
339
+ return idx
340
+ if attrs.get("contenteditable") and role == "textbox":
341
+ return idx
342
+ return None
343
+
344
+ # 按文件类型查找 file input(如图片上传)
345
+ def _find_image_file_input(bs, sm):
346
+ for idx, node in sm.items():
347
+ if not bs.is_file_input(node):
348
+ continue
349
+ attrs = getattr(node, "attributes", {}) or {}
350
+ accept = attrs.get("accept", "")
351
+ if any(ext in accept for ext in [".jpg", ".jpeg", ".png", ".webp"]):
352
+ return idx
353
+ for idx, node in sm.items():
354
+ if bs.is_file_input(node):
355
+ return idx
356
+ return None
357
+ ```
128
358
 
129
- ### 保存 workflow
359
+ ### 文件上传
130
360
 
131
- 当用户完成浏览器操作后说"把这个操作保存下来"或类似意图时:
361
+ 上传文件需要 `FileSystem` 和 `available_file_paths`:
132
362
 
133
- 1. **保存前先查看最终页面 DOM**:`browser-cli dom` — 记住页面结构,为 extract 步骤准备 CSS selector(注意:session 必须还在,未 close)
134
- 2. `browser-cli save-workflow '操作名称'` — 保存操作记录(自动附带 DOM snapshot)。同名 workflow 不会覆盖,需先 `delete-workflow` 再重新保存
135
- 3. `edit-workflow` 进行参数化:
136
- - 识别可变值(搜索词、用户名、密码等),用 `--set-var` 替换为变量占位符
137
- - 用 `--add-input` 声明 input_schema
138
- - 如果 workflow 需要返回数据(如查价格、查状态),根据保存时看到的 DOM 结构添加 extract 步骤(用 CSS selector 指定要提取的字段);纯执行型 workflow(如发帖)可不加 extract
139
- - 对支付、删除等敏感操作,用 `--update-step N --requires-confirmation` 标记
140
- 4. 确认 workflow 可用:`browser-cli workflow-info '操作名称'`
363
+ ```python
364
+ from browser_use.filesystem.file_system import FileSystem
365
+ from app.services.file_upload import UPLOAD_TEMP_BASE
366
+ import secrets as _secrets
141
367
 
142
- ### 执行 workflow
368
+ # 创建临时目录
369
+ fs_dir = UPLOAD_TEMP_BASE / f"script_{_secrets.token_hex(4)}"
370
+ fs_dir.mkdir(parents=True, exist_ok=True)
371
+ file_system = FileSystem(base_dir=str(fs_dir), create_default_files=False)
143
372
 
144
- 当用户说"用之前录的 XX workflow"或类似意图时:
373
+ # 上传每个文件
374
+ images = json.loads(ctx.params.get("images", "[]"))
375
+ file_idx = _find_image_file_input(bs, sm)
376
+ for img_path in images:
377
+ action = ctx.action_model(upload_file={"index": file_idx, "path": img_path})
378
+ await ctx.controller.act(action, bs, available_file_paths=images, file_system=file_system)
379
+ ```
145
380
 
146
- 1. `browser-cli list-workflows` — 确认 workflow 存在
147
- 2. `browser-cli run-workflow '名称' --param key=value` — 启动执行
148
- 3. `browser-cli workflow-status <task_id>` — 每 3-5 秒轮询一次状态
149
- 4. 根据状态处理:
150
- - `needs_input` → 检查输出中的字段:
151
- - 有 `Missing: xxx` → 缺少变量值,向用户询问后用 `workflow-input --var xxx=值` 提供
152
- - 有 `Confirmation: xxx` → 敏感操作待确认,告知用户后用 `workflow-input --confirm` 确认
153
- - `failed` → 用 `--session` flag 接管当前 session 完成任务(见降级接管)
154
- - `completed` → 返回 Output 给用户
381
+ 文件路径由引擎自动转换(~/xxx /mnt/workspaces/{userId}/xxx),脚本直接用 `ctx.params` 中的路径即可。
155
382
 
156
- ### 降级接管
383
+ ### 等待页面变化
157
384
 
158
- workflow 执行失败后,session 仍然活跃。用 `workflow-status` 返回的 session ID 接管:
385
+ 页面操作后(导航、上传、点击)需要等待,用轮询模式:
159
386
 
160
- ```bash
161
- browser-cli --session <session_id> dom # 查看当前页面
162
- browser-cli --session <session_id> click 5 # 用普通命令完成剩余操作
163
- browser-cli --session <session_id> close # 完成后关闭
387
+ ```python
388
+ # 简单等待
389
+ await asyncio.sleep(PAGE_SETTLE)
390
+
391
+ # 轮询等待某个元素出现(如上传后等编辑模式)
392
+ target_idx = None
393
+ for attempt in range(10):
394
+ await asyncio.sleep(PAGE_SETTLE)
395
+ state = await bs.get_browser_state_summary(include_screenshot=False, cached=False)
396
+ sm = state.dom_state.selector_map or {}
397
+ target_idx = _find_by_placeholder(sm, "填写标题")
398
+ if target_idx is not None:
399
+ break
400
+ if target_idx is None:
401
+ raise RuntimeError("等待超时:目标元素未出现")
164
402
  ```
165
403
 
166
- 不需要重新 `launch`。
404
+ ### 完整示例
405
+
406
+ 参考系统内置脚本 `xiaohongshu-image-note`(通过 `browser-cli script-code xiaohongshu-image-note` 查看完整源码)。
407
+
408
+ ## 注意事项
409
+
410
+ - **脚本优先**:每次操作前先 `browser-cli scripts`,有脚本就用脚本
411
+ - **DOM 自动返回**:每个操作会自动返回 DOM 状态 — 无需单独调用 `browser-cli dom`
412
+ - **避免不必要的截图**:截图消耗 token,日常导航用 DOM 输出即可
413
+ - **5 分钟超时**:无操作 5 分钟后会话自动关闭
414
+ - **登录自动保持**:登录一次后自动持久化,无需重复登录
415
+ - **文件路径自动转换**:直接传你看到的路径(~/xxx),后端自动处理
416
+ - **务必关闭**:操作完成后运行 `browser-cli close` 释放资源
417
+ - **Fallback 后更新脚本**:如果脚本频繁失败,重新生成并 submit-script 覆盖
File without changes
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import "@optima-chat/comfy-cli";
3
+ //# sourceMappingURL=comfy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"comfy.d.ts","sourceRoot":"","sources":["../../bin/comfy.ts"],"names":[],"mappings":";AACA,OAAO,wBAAwB,CAAC"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import "@optima-chat/comfy-cli";
3
+ //# sourceMappingURL=comfy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"comfy.js","sourceRoot":"","sources":["../../bin/comfy.ts"],"names":[],"mappings":";AACA,OAAO,wBAAwB,CAAC"}
File without changes
File without changes
File without changes
package/dist/bin/scout.js CHANGED
File without changes
@@ -11,7 +11,7 @@ export declare class OptimaAgent {
11
11
  * @param prompt 用户输入
12
12
  * @param chatOptions 选项,包括 streamFormat: 'delta' | 'content'
13
13
  */
14
- chat(prompt: string, chatOptions?: ChatOptions): AsyncGenerator<import("@anthropic-ai/claude-agent-sdk").SDKUserMessage | import("@anthropic-ai/claude-agent-sdk").SDKAssistantMessage | import("@anthropic-ai/claude-agent-sdk").SDKResultSuccess | import("@anthropic-ai/claude-agent-sdk").SDKResultError | import("@anthropic-ai/claude-agent-sdk").SDKSystemMessage | import("@anthropic-ai/claude-agent-sdk").SDKPartialAssistantMessage | import("@anthropic-ai/claude-agent-sdk").SDKCompactBoundaryMessage | import("@anthropic-ai/claude-agent-sdk").SDKStatusMessage | import("@anthropic-ai/claude-agent-sdk").SDKLocalCommandOutputMessage | import("@anthropic-ai/claude-agent-sdk").SDKHookStartedMessage | import("@anthropic-ai/claude-agent-sdk").SDKHookProgressMessage | import("@anthropic-ai/claude-agent-sdk").SDKHookResponseMessage | import("@anthropic-ai/claude-agent-sdk").SDKToolProgressMessage | import("@anthropic-ai/claude-agent-sdk").SDKAuthStatusMessage | import("@anthropic-ai/claude-agent-sdk").SDKTaskNotificationMessage | import("@anthropic-ai/claude-agent-sdk").SDKTaskStartedMessage | import("@anthropic-ai/claude-agent-sdk").SDKTaskProgressMessage | import("@anthropic-ai/claude-agent-sdk").SDKFilesPersistedEvent | import("@anthropic-ai/claude-agent-sdk").SDKToolUseSummaryMessage | import("@anthropic-ai/claude-agent-sdk").SDKRateLimitEvent | import("@anthropic-ai/claude-agent-sdk").SDKElicitationCompleteMessage | import("@anthropic-ai/claude-agent-sdk").SDKPromptSuggestionMessage | {
14
+ chat(prompt: string, chatOptions?: ChatOptions): AsyncGenerator<import("@anthropic-ai/claude-agent-sdk").SDKUserMessage | import("@anthropic-ai/claude-agent-sdk").SDKAssistantMessage | import("@anthropic-ai/claude-agent-sdk").SDKResultSuccess | import("@anthropic-ai/claude-agent-sdk").SDKResultError | import("@anthropic-ai/claude-agent-sdk").SDKSystemMessage | import("@anthropic-ai/claude-agent-sdk").SDKPartialAssistantMessage | import("@anthropic-ai/claude-agent-sdk").SDKCompactBoundaryMessage | import("@anthropic-ai/claude-agent-sdk").SDKStatusMessage | import("@anthropic-ai/claude-agent-sdk").SDKAPIRetryMessage | import("@anthropic-ai/claude-agent-sdk").SDKLocalCommandOutputMessage | import("@anthropic-ai/claude-agent-sdk").SDKHookStartedMessage | import("@anthropic-ai/claude-agent-sdk").SDKHookProgressMessage | import("@anthropic-ai/claude-agent-sdk").SDKHookResponseMessage | import("@anthropic-ai/claude-agent-sdk").SDKToolProgressMessage | import("@anthropic-ai/claude-agent-sdk").SDKAuthStatusMessage | import("@anthropic-ai/claude-agent-sdk").SDKTaskNotificationMessage | import("@anthropic-ai/claude-agent-sdk").SDKTaskStartedMessage | import("@anthropic-ai/claude-agent-sdk").SDKTaskProgressMessage | import("@anthropic-ai/claude-agent-sdk").SDKFilesPersistedEvent | import("@anthropic-ai/claude-agent-sdk").SDKToolUseSummaryMessage | import("@anthropic-ai/claude-agent-sdk").SDKRateLimitEvent | import("@anthropic-ai/claude-agent-sdk").SDKElicitationCompleteMessage | import("@anthropic-ai/claude-agent-sdk").SDKPromptSuggestionMessage | {
15
15
  type: "text_delta";
16
16
  delta: {
17
17
  text: string;
@@ -0,0 +1,6 @@
1
+ import type { HooksConfig } from "./types.js";
2
+ /**
3
+ * 从 skills 目录加载所有 skill 的 hooks
4
+ */
5
+ export declare function loadSkillHooks(skillsDir: string, cwd: string, getSessionId: () => string | undefined): HooksConfig;
6
+ //# sourceMappingURL=hooks-loader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hooks-loader.d.ts","sourceRoot":"","sources":["../../src/hooks-loader.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,WAAW,EAAgB,MAAM,YAAY,CAAC;AAwL5D;;GAEG;AACH,wBAAgB,cAAc,CAC5B,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,MAAM,EACX,YAAY,EAAE,MAAM,MAAM,GAAG,SAAS,GACrC,WAAW,CAiFb"}