agentpage 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # AutoPilot
2
2
 
3
+ <p align="center">
4
+ <img src="./assets/logo/contours%20(2).svg" alt="AutoPilot Logo" width="180" />
5
+ </p>
6
+
3
7
  > 浏览器内嵌 AI Agent SDK:让 AI 通过 tool-calling 操作网页。
4
8
 
5
9
  > 核心主张:通过 **Prompt + Tools + 路由**,快速为网站实现 AI 赋能,并构建**前端运行时 AI Skill**。AutoPilot 本质上是一个运行在前端浏览器中的 AI Agent。
@@ -145,8 +149,8 @@ import { WebAgent } from "agentpage";
145
149
 
146
150
  const agent = new WebAgent({
147
151
  token: "your-api-key",
148
- provider: "deepseek", // openai | copilot | anthropic | deepseek
149
- model: "deepseek-chat",
152
+ provider: "doubao", // openai | copilot | anthropic | deepseek | doubao | qwen
153
+ model: "doubao-1.5-pro-32k",
150
154
  // 用户可自定义 Prompt 规则(项目级/路由级)
151
155
  systemPrompt: "You are an assistant for this route. Follow route safety constraints.",
152
156
  memory: true,
@@ -227,9 +231,9 @@ applyRouteSkill(location.pathname);
227
231
  | 参数 | 类型 | 默认值 | 说明 |
228
232
  | --- | --- | --- | --- |
229
233
  | `client` | `AIClient` | - | 自定义 AI 客户端;传入后优先使用该实例,忽略 token/provider/model/baseURL |
230
- | `token` | `string` | `""` | API Token(GitHub PAT / OpenAI API Key / Anthropic Key / DeepSeek Key) |
231
- | `provider` | `string` | `"copilot"` | AI 服务商:`copilot` / `openai` / `anthropic` / `deepseek` |
232
- | `model` | `string` | `"gpt-4o"` | 模型名称(需与 provider 匹配,如 `deepseek-chat`、`claude-sonnet-4-20250514`) |
234
+ | `token` | `string` | `""` | API Token(GitHub PAT / OpenAI API Key / Anthropic Key / DeepSeek Key / Doubao Ark Key / DashScope Key) |
235
+ | `provider` | `string` | `"copilot"` | AI 服务商:`copilot` / `openai` / `anthropic` / `deepseek` / `doubao` / `qwen` |
236
+ | `model` | `string` | `"gpt-4o"` | 模型名称(需与 provider 匹配,如 `doubao-1.5-pro-32k`、`qwen-plus`、`deepseek-chat`) |
233
237
  | `baseURL` | `string` | - | 自定义 API 基础地址(用于代理/私有部署,覆盖 provider 默认端点) |
234
238
  | `stream` | `boolean` | `true` | 是否启用流式返回(SSE);关闭后使用 JSON 非流式响应 |
235
239
  | `dryRun` | `boolean` | `false` | 干运行模式:仅输出 AI 计划调用的工具列表,不执行真实操作 |
@@ -238,6 +242,7 @@ applyRouteSkill(location.pathname);
238
242
  | `memory` | `boolean` | `false` | 是否开启多轮对话记忆(跨 chat 调用保留历史消息) |
239
243
  | `autoSnapshot` | `boolean` | `true` | chat 前是否自动生成首轮页面快照并注入 system prompt |
240
244
  | `snapshotOptions` | `SnapshotOptions` | `{}` | 快照生成参数覆盖(深度、裁剪、剪枝、节点上限等) |
245
+ | `roundStabilityWait` | `RoundStabilityWaitOptions` | `{ enabled: true }` | 轮次后稳定等待配置(loading hidden + DOM stable);`loadingSelectors` 为“与默认值合并去重”,不会覆盖默认列表 |
241
246
 
242
247
  ### 参数详细说明
243
248
 
@@ -282,6 +287,8 @@ type AIClient = {
282
287
  | `openai` | `https://api.openai.com/v1` | `gpt-4o` / `gpt-4o-mini` | 标准 OpenAI 接口 |
283
288
  | `anthropic` | `https://api.anthropic.com` | `claude-sonnet-4-20250514` | Anthropic 原生接口 |
284
289
  | `deepseek` | `https://api.deepseek.com` | `deepseek-chat` | DeepSeek 接口 |
290
+ | `doubao` | `https://ark.cn-beijing.volces.com/api/v3` | `doubao-1.5-pro-32k` | 火山引擎 Ark(OpenAI 兼容) |
291
+ | `qwen` | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen-plus` | 阿里云百炼兼容模式(OpenAI 兼容) |
285
292
 
286
293
  #### `systemPrompt`(Prompt 注册与维护)
287
294
 
@@ -702,7 +709,7 @@ AutoPilot 内置 5 个工具,覆盖浏览器交互的核心能力。所有工
702
709
  | `attribute` | `string` | get_attr/set_attr | 属性名称 |
703
710
  | `className` | `string` | add_class/remove_class | CSS 类名(旧参数名,已被 `value` 兼容) |
704
711
  | `clickCount` | `number` | click | 点击次数(默认 1,双击传 2,三击传 3) |
705
- | `waitMs` | `number` | 所有动作 | 等待元素出现的超时时间(毫秒,默认 2000) |
712
+ | `waitMs` | `number` | 所有动作 | 等待元素出现的超时时间(毫秒,默认 1200) |
706
713
  | `waitSeconds` | `number` | 所有动作 | 等待超时(秒,`waitMs` 优先级更高) |
707
714
  | `force` | `boolean` | 所有动作 | 跳过 actionability 检查(默认 false) |
708
715
 
@@ -812,7 +819,7 @@ AutoPilot 内置 5 个工具,覆盖浏览器交互的核心能力。所有工
812
819
  | `selector` | `string` | wait_for_selector/hidden | 目标元素选择器 |
813
820
  | `state` | `string` | wait_for_selector | 目标状态:`attached`/`visible`/`hidden`/`detached` |
814
821
  | `text` | `string` | wait_for_text | 要等待出现的文本内容 |
815
- | `timeout` | `number` | 所有动作 | 超时时间(毫秒,默认 10000) |
822
+ | `timeout` | `number` | 所有动作 | 超时时间(毫秒,默认 6000) |
816
823
  | `quietMs` | `number` | wait_for_stable | DOM 静默窗口时长(毫秒,默认 300) |
817
824
 
818
825
  **动作详解:**
@@ -1034,6 +1041,8 @@ AI 每一轮不是“凭记忆猜页面”,而是基于最新快照选择可
1034
1041
  例子:
1035
1042
  - 可同轮:同时填写两个已可见输入框
1036
1043
  - 不可同轮:点击“打开弹窗”后立即填写弹窗字段(应等下一轮新快照)
1044
+ - 当前实现:若本轮出现潜在 DOM 变化动作,轮次结束会自动执行双重等待(先 loading hidden,再 DOM quiet window),默认 `quietMs=200`、`timeoutMs=4000`。
1045
+ - `loadingSelectors` 默认内置 AntD / Element Plus / BK / TDesign(TD)及通用加载态选择器;用户自定义会在默认列表基础上追加并去重,不会覆盖默认值。
1037
1046
 
1038
1047
  ---
1039
1048
 
@@ -1083,12 +1092,13 @@ loop 对本轮返回做以下处理:
1083
1092
  1. 执行工具调用批次
1084
1093
  2. 拦截 `page_info.*`(在 loop 内视为冗余,不让其成为主流程)
1085
1094
  3. 处理恢复(元素找不到时自动刷新快照)
1086
- 4. 刷新快照进入下一轮
1087
- 5. 更新下一轮任务文本:
1095
+ 4. 若本轮存在潜在 DOM 变化动作:执行轮次后稳定等待(loading hidden + DOM stable)
1096
+ 5. 刷新快照进入下一轮
1097
+ 6. 更新下一轮任务文本:
1088
1098
  - 优先使用 `REMAINING`
1089
1099
  - 若缺失 `REMAINING` 且本轮有执行动作:按线性任务剔除做启发式推进(避免整段原任务重复)
1090
1100
  - 若缺失 `REMAINING` 且本轮无执行进展:保持当前任务不推进(按协议回退)
1091
- 6. 若“remaining 未完成 + 无工具调用”:
1101
+ 7. 若“remaining 未完成 + 无工具调用”:
1092
1102
  - 不直接结束
1093
1103
  - 下一轮注入 `Protocol violation` 强约束提示,要求“要么给可执行工具调用,要么严格 `REMAINING: DONE`”
1094
1104
 
@@ -1099,12 +1109,12 @@ loop 对本轮返回做以下处理:
1099
1109
  1. 收集失败工具调用(name/input)及失败原因
1100
1110
  2. 将“失败工具集合 + 最新快照 + 当前任务”一起发给模型重试
1101
1111
  3. 在消息中标注重试次数:`attempt x/y`
1102
- 4. 若仍未命中,默认 `await 2000ms` 后刷新快照再重试
1112
+ 4. 若仍未命中,默认 `await 1000ms` 后刷新快照再重试
1103
1113
  5. 超过最大尝试次数后退出重试流,交由模型给出剩余任务或结束
1104
1114
 
1105
1115
  默认参数:
1106
1116
  - `DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2`
1107
- - `DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 2000`
1117
+ - `DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 1000`
1108
1118
 
1109
1119
  ### 4) 停机条件
1110
1120
 
@@ -1429,7 +1439,7 @@ agent.registerTool({
1429
1439
  - `agent-loop`:轮次编排、停机判定、恢复/重试、指标汇总
1430
1440
  - `system-prompt`:系统规则模板
1431
1441
  - `tool-registry`:工具注册/分发/错误兜底
1432
- - `ai-client`:多 provider 协议适配(OpenAI/Copilot/Anthropic/DeepSeek)
1442
+ - `ai-client`:多 provider 协议适配(OpenAI/Copilot/Anthropic/DeepSeek/Doubao/Qwen
1433
1443
  - `web`(浏览器实现):
1434
1444
  - `WebAgent`:入口编排、记忆、autoSnapshot、callbacks
1435
1445
  - `tools`:DOM/导航/页面信息/等待/evaluate
@@ -1523,7 +1533,7 @@ agent.registerTool({
1523
1533
  | 冗余 page_info 拦截 | `page_info.snapshot/query_all/get_url/get_title/get_viewport` | - | 直接返回拦截结果,不执行真实调用 | `recovery.ts#checkRedundantSnapshot` |
1524
1534
  | 连续 snapshot 防抖 | 连续 page_info.snapshot | 阈值=2 | 标记 `REDUNDANT_SNAPSHOT` | `recovery.ts#applySnapshotDebounce` |
1525
1535
  | 元素未找到自动恢复 | `dom` 且结果为 element not found | `DEFAULT_ACTION_RECOVERY_ROUNDS=2`,`DEFAULT_RECOVERY_WAIT_MS=100` | 等待 -> 刷新快照 -> 返回 recovery 结果 | `recovery.ts#handleElementRecovery` |
1526
- | Not-found 重试对话流 | 本轮有 not-found 失败任务 | `DEFAULT_NOT_FOUND_RETRY_ROUNDS=2`,`DEFAULT_NOT_FOUND_RETRY_WAIT_MS=2000` | 注入失败任务上下文 + attempt x/y,必要时等待后重试 | `index.ts` 主循环 |
1536
+ | Not-found 重试对话流 | 本轮有 not-found 失败任务 | `DEFAULT_NOT_FOUND_RETRY_ROUNDS=2`,`DEFAULT_NOT_FOUND_RETRY_WAIT_MS=1000` | 注入失败任务上下文 + attempt x/y,必要时等待后重试 | `index.ts` 主循环 |
1527
1537
  | 导航后上下文刷新 | `navigate` 成功且动作为 goto/back/forward/reload | - | 立即刷新快照 | `recovery.ts#handleNavigationUrlChange` |
1528
1538
  | 空转检测 | 连续只读轮次 | 连续 2 轮 | 返回 -1 终止 | `recovery.ts#detectIdleLoop` |
1529
1539
 
@@ -1776,27 +1786,38 @@ agent.registerTool({
1776
1786
  - `DEFAULT_RECOVERY_WAIT_MS = 100`
1777
1787
  - `DEFAULT_ACTION_RECOVERY_ROUNDS = 2`
1778
1788
  - `DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2`
1779
- - `DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 2000`
1789
+ - `DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 1000`
1780
1790
 
1781
1791
  `src/web/tools/wait-tool.ts`:
1782
1792
 
1783
- - `DEFAULT_TIMEOUT = 10000`
1793
+ - `DEFAULT_TIMEOUT = 6000`
1784
1794
 
1785
1795
  ---
1786
1796
 
1787
1797
  ### 10. 文档与实现一致性清单(维护者必看)
1788
1798
 
1799
+ Agent Loop 机制权威文档:`src/core/agent-loop/LOOP_MECHANISM.md`。
1800
+
1789
1801
  任何涉及“渐进式任务消费”的改动,至少同步以下文件:
1790
1802
 
1791
1803
  1. `src/core/agent-loop/messages.ts`(输入语义)
1792
1804
  2. `src/core/agent-loop/index.ts`(停机判定与推进逻辑)
1793
- 3. `README.md`(机制说明)
1805
+ 3. `src/core/agent-loop/LOOP_MECHANISM.md`(机制权威说明)
1806
+ 4. `README.md`(机制说明)
1794
1807
 
1795
1808
  任何涉及“找不到元素重试流”的改动,至少同步:
1796
1809
 
1797
1810
  1. `src/core/agent-loop/index.ts`
1798
1811
  2. `src/core/agent-loop/recovery.ts`
1799
- 3. `README.md`
1812
+ 3. `src/core/agent-loop/LOOP_MECHANISM.md`
1813
+ 4. `README.md`
1814
+
1815
+ 任何涉及 provider 新增/调整的改动,至少同步:
1816
+
1817
+ 1. `src/core/ai-client/index.ts`(provider 路由)
1818
+ 2. `src/core/ai-client/constants.ts`(默认端点)
1819
+ 3. `src/web/index.ts`(WebAgentOptions 注释/提示)
1820
+ 4. `README.md`(配置示例与支持矩阵)
1800
1821
 
1801
1822
  这样才能保证“实现、提示词、文档”三者一致,不出现行为漂移。
1802
1823
 
@@ -1848,4 +1869,4 @@ MIT
1848
1869
  4. **RefStore - 生命周期图**:从 `chat()` 创建到 `clear()` 释放的完整流程
1849
1870
  5. **AI Client - 自定义接入**:`BaseAIClient` 和纯对象两种方式的代码示例
1850
1871
  6. **快照格式 - 生成管线**:从 `document.body` 到最终文本的 10 步管线流程
1851
- 7. **错误处理 - 恢复常量**:`DEFAULT_ACTION_RECOVERY_ROUNDS=2` / `DEFAULT_NOT_FOUND_RETRY_ROUNDS=2` / `DEFAULT_NOT_FOUND_RETRY_WAIT_MS=2000` 等关键参数说明
1872
+ 7. **错误处理 - 恢复常量**:`DEFAULT_ACTION_RECOVERY_ROUNDS=2` / `DEFAULT_NOT_FOUND_RETRY_ROUNDS=2` / `DEFAULT_NOT_FOUND_RETRY_WAIT_MS=1000` 等关键参数说明
package/dist/index.d.mts CHANGED
@@ -197,6 +197,17 @@ type SnapshotOptions = {
197
197
  maxNodes?: number; /** 每个父节点最多输出的子元素数(默认 25),超出部分会折叠。 */
198
198
  maxChildren?: number; /** 文本截断长度(默认 40)。 */
199
199
  maxTextLength?: number;
200
+ /**
201
+ * 是否对“选项列表”容器放宽子节点截断(默认 false)。
202
+ * 典型场景:时间选择器/下拉选项列表,避免关键选项被 `...children omitted` 折叠。
203
+ */
204
+ expandOptionLists?: boolean;
205
+ /**
206
+ * 仅对指定 hash ref 节点放宽子节点截断(优先级高于默认 maxChildren)。
207
+ * 例如:[#abc123, #def456],用于 AI 在看到 children omitted 后定向请求放宽。
208
+ */
209
+ expandChildrenRefs?: string[]; /** 对 expandChildrenRefs 节点生效的子节点上限(默认 120)。 */
210
+ expandedChildrenLimit?: number;
200
211
  };
201
212
  /**
202
213
  * 生成页面 DOM 快照 — 将 DOM 树转为 AI 可理解的文本描述。
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/core/tool-registry.ts","../src/core/types.ts","../src/core/agent-loop/types.ts","../src/web/ref-store.ts","../src/web/tools/page-info-tool.ts","../src/web/tools/dom-tool.ts","../src/web/tools/navigate-tool.ts","../src/web/tools/wait-tool.ts","../src/web/tools/evaluate-tool.ts","../src/web/messaging.ts","../src/web/index.ts"],"mappings":";;;;;;KA0BY,cAAA;EAuBV,qCArBA,OAAA,WAAkB,MAAA,mBAqBR;EAnBV,OAAA,GAAU,MAAA;AAAA;;;;;;ACfZ;;;KD0BY,cAAA;ECxBV,4CD0BA,IAAA,UCtBA;EDwBA,WAAA,UCxBK;ED0BL,MAAA,EAAQ,OAAA,ECpBW;EDsBnB,OAAA,GAAU,MAAA,EAAQ,MAAA,sBAA4B,OAAA,CAAQ,cAAA;AAAA;;;;KClC5C,UAAA;ED0BA,0BCxBV,EAAA;EAEA,IAAA,UD8BkB;EC5BlB,KAAA;AAAA;;KAMU,SAAA;EACV,IAAA,4CDiBA;ECfA,OAAA,WAAkB,KAAA;IAAQ,UAAA;IAAoB,MAAA;EAAA,IDmBpC;ECjBV,SAAA,GAAY,UAAA;AAAA;;KAMF,cAAA;iBAEV,IAAA;EAEA,SAAA,GAAY,UAAA,IA3BQ;EA6BpB,KAAA;IAAU,WAAA;IAAqB,YAAA;EAAA;AAAA;;;AAjBjC;;;KA2BY,QAAA;EACV,IAAA,CAAK,MAAA;IACH,YAAA;IACA,QAAA,EAAU,SAAA;IACV,KAAA,GAAQ,cAAA;EAAA,IACN,OAAA,CAAQ,cAAA;AAAA;;;KClDF,gBAAA;EACV,UAAA;EACA,cAAA;EACA,mBAAA;EACA,eAAA;EACA,eAAA;EACA,aAAA;EACA,uBAAA;EACA,iBAAA;EACA,kBAAA;EACA,eAAA;EACA,eAAA;EACA,WAAA;EACA,YAAA;AAAA;;KAMU,kBAAA;EFmBF,mBEjBR,MAAA,IAAU,IAAA,mBFmBQ;EEjBlB,UAAA,IAAc,IAAA,UAAc,KAAA,oBFiBkB;EEf9C,YAAA,IAAgB,IAAA,UAAc,MAAA,EAAQ,cAAA,WFe8B;EEbpE,OAAA,IAAW,KAAA;;;;ADrBb;;;;;EC8BE,wBAAA,IAA4B,MAAA,oBDxB5B;EC0BA,SAAA,IAAa,OAAA,EAAS,gBAAA;AAAA;AAAA,KA0BZ,eAAA;ED7BA,iBC+BV,KAAA,UD/B2C;ECiC3C,SAAA,EAAW,KAAA;IAAQ,IAAA;IAAc,KAAA;IAAgB,MAAA,EAAQ,cAAA;EAAA,IDnB/C;ECqBV,QAAA,EAAU,SAAA,IDpBN;ECsBJ,OAAA,EAAS,gBAAA;AAAA;;;;;;AFvDX;;;;;;;;;;AAeA;;;;;;;;;;;;;;cGAa,QAAA;EAAA,QACH,GAAA;EHOsC;EAAA,QGLtC,MAAA;EHK4D;;;;cGCxD,GAAA;EFnCF;;;;;;;EE8CV,GAAA,CAAI,EAAA,EAAI,OAAA,EAAS,IAAA;EFxCZ;AAMP;;;EEkDE,GAAA,CAAI,EAAA,WAAa,OAAA;EFjDjB;EEsDA,GAAA,CAAI,EAAA;EFpDc;EEyDlB,KAAA,CAAA;EFzD8C;;;;;AAQhD;;;EE6DE,KAAA,CAAM,GAAA;EF3DN;EAAA,IEmEI,IAAA,CAAA;AAAA;;;;KCvFM,eAAA;EJMc,mBIJxB,QAAA;EJQgB;;;;;EIFhB,YAAA;EJEgB;AAWlB;;;;EIPE,WAAA;EJesD;;;;;EITtD,QAAA,GAAW,QAAA,EJOX;EILA,QAAA,WJOA;EILA,WAAA,WJKU;EIHV,aAAA;AAAA;;;;;;AH/BF;;;;;;;;;AAYA;;;;;;iBG0CgB,gBAAA,CACd,IAAA,GAAM,OAAA,EACN,OAAA,GAAS,eAAA;AAAA,iBAqVK,kBAAA,CAAA,GAAsB,cAAA;;;iBCqHtB,aAAA,CAAA,GAAiB,cAAA;;;iBCvfjB,kBAAA,CAAA,GAAsB,cAAA;;;iBCgMtB,cAAA,CAAA,GAAkB,cAAA;;;iBCxJlB,kBAAA,CAAA,GAAsB,cAAA;;;;;;ARxCtC;;;;;;;;;;AAeA;;;;;;;;;;;;;;KSXY,eAAA;EACV,IAAA;EACA,QAAA;EACA,MAAA,EAAQ,MAAA;EACR,MAAA;AAAA;;KAIU,gBAAA;EACV,IAAA;EACA,MAAA;EACA,MAAA;IACE,OAAA,WAAkB,MAAA;IAClB,OAAA,GAAU,MAAA;EAAA;AAAA;;;;ARhBd;;;;;iBQ8BgB,mBAAA,CAAA,IAEZ,QAAA,UACA,MAAA,EAAQ,MAAA,sBACP,OAAA;EAAU,OAAA,WAAkB,MAAA;EAAyB,OAAA,GAAU,MAAA;AAAA;;KAgCxD,eAAA,GAAkB,GAAA,UAE3B,MAAA,EAAQ,MAAA,sBAA4B,OAAA;EACnC,OAAA,WAAkB,MAAA;EAClB,OAAA,GAAU,MAAA;AAAA;;;;;;;;;iBAYE,mBAAA,CAAoB,SAAA,EAAW,eAAA;;;;KC1DnC,iBAAA,GAAoB,kBAAA;oBAE9B,UAAA,IAAc,QAAA;AAAA;AAAA,KAKJ,eAAA;ET3CU;;;;;;AAYtB;;;;;ES2CE,MAAA,GAAS,QAAA,ETxCS;ES0ClB,KAAA,WT1C8C;ES4C9C,QAAA,WT1CY;ES4CZ,KAAA,WT5CsB;ES8CtB,OAAA,WTxCwB;ES0CxB,MAAA,YTtCsB;ESwCtB,MAAA;ETxCA;;;;;ES8CA,YAAA,YAAwB,MAAA,kBT5CmB;ES8C3C,SAAA,WTpCkB;ESsClB,MAAA,YTnCY;ESqCZ,YAAA,YTnCY;ESqCZ,eAAA,GAAkB,eAAA;AAAA;AAAA,cAKP,QAAA;ET9CX;EAAA,wBSgDwB,yBAAA;ET9CtB;EAAA,wBSgDsB,kBAAA;ET/CtB;EAAA,QSkDM,MAAA;EAAA,QACA,KAAA;EAAA,QACA,QAAA;EAAA,QACA,KAAA;EAAA,QACA,OAAA;EAAA,QACA,MAAA;EAAA,QACA,MAAA;EAAA,QACA,SAAA;;UAEA,oBAAA;ER5GkB;EAAA,QQ8GlB,kBAAA;ER9GkB;EAAA,QQiHlB,MAAA;ER/GR;EAAA,QQiHQ,OAAA;ER/GR;EAAA,QQiHQ,YAAA;ER/GR;EAAA,QQiHQ,eAAA;ER/GR;EAAA,QQkHQ,QAAA;ERhHR;EQmHA,SAAA,EAAW,iBAAA;cAEC,OAAA,EAAS,eAAA;ERlHrB;EQyIA,aAAA,CAAA;ERzIY;EQsJZ,YAAA,CAAa,IAAA,EAAM,cAAA;ERhJS;;;;;EQyJ5B,UAAA,CAAW,IAAA;ERrJG;EQ2Jd,OAAA,CAAQ,IAAA;ERzJR;EQ8JA,YAAA,CAAA;ER9JsC;;;;EQsKtC,gBAAA,CAAA;ER3J4B;EQuK5B,QAAA,CAAA,GAAY,cAAA;ERrKU;EQ4KtB,QAAA,CAAS,KAAA;ER5K6B;;AA0BxC;;;;EQ4JE,SAAA,CAAU,MAAA,EAAQ,QAAA;ERtJR;EQ2JV,WAAA,CAAY,QAAA;ERzJa;EQ8JzB,QAAA,CAAS,KAAA;ERpKT;EQyKA,SAAA,CAAU,OAAA;ERvKC;EQ4KX,SAAA,CAAA;ER5KiC;EQiLjC,SAAA,CAAU,OAAA;ERjL+C;;;;;EQ0LzD,eAAA,CAAgB,MAAA;EAChB,eAAA,CAAgB,GAAA,UAAa,MAAA;;EAe7B,gBAAA,CAAiB,OAAA,EAAS,MAAA;;EAO1B,kBAAA,CAAmB,GAAA;EPrPA;EO0PnB,oBAAA,CAAqB,GAAA;EPtNG;EO+NxB,gBAAA,CAAA,GAAoB,MAAA;EPhQZ;EOqQR,kBAAA,CAAA;EP/PY;EOoQZ,SAAA,CAAU,OAAA;EPzPF;EO+PR,SAAA,CAAA;EP/PiB;EOoQjB,eAAA,CAAgB,OAAA;EPpPZ;EOyPJ,eAAA,CAAA;EPpPA;EOyPA,kBAAA,CAAmB,OAAA,EAAS,eAAA;EPpP5B;EOyPA,kBAAA,CAAA,GAAsB,eAAA;EP7OhB;EOkPN,YAAA,CAAA;EP1OQ;;;;;ACvFV;;;;EMgVQ,IAAA,CAAK,OAAA,WAAkB,OAAA,CAAQ,eAAA;ENxUrC;;;;;EAAA,QMgaQ,mBAAA;AAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/core/tool-registry.ts","../src/core/types.ts","../src/core/agent-loop/types.ts","../src/web/ref-store.ts","../src/web/tools/page-info-tool.ts","../src/web/tools/dom-tool.ts","../src/web/tools/navigate-tool.ts","../src/web/tools/wait-tool.ts","../src/web/tools/evaluate-tool.ts","../src/web/messaging.ts","../src/web/index.ts"],"mappings":";;;;;;KA0BY,cAAA;EAuBV,qCArBA,OAAA,WAAkB,MAAA,mBAqBR;EAnBV,OAAA,GAAU,MAAA;AAAA;;;;;;ACfZ;;;KD0BY,cAAA;ECxBV,4CD0BA,IAAA,UCtBA;EDwBA,WAAA,UCxBK;ED0BL,MAAA,EAAQ,OAAA,ECpBW;EDsBnB,OAAA,GAAU,MAAA,EAAQ,MAAA,sBAA4B,OAAA,CAAQ,cAAA;AAAA;;;;KClC5C,UAAA;ED0BA,0BCxBV,EAAA;EAEA,IAAA,UD8BkB;EC5BlB,KAAA;AAAA;;KAMU,SAAA;EACV,IAAA,4CDiBA;ECfA,OAAA,WAAkB,KAAA;IAAQ,UAAA;IAAoB,MAAA;EAAA,IDmBpC;ECjBV,SAAA,GAAY,UAAA;AAAA;;KAMF,cAAA;iBAEV,IAAA;EAEA,SAAA,GAAY,UAAA,IA3BQ;EA6BpB,KAAA;IAAU,WAAA;IAAqB,YAAA;EAAA;AAAA;;;AAjBjC;;;KA2BY,QAAA;EACV,IAAA,CAAK,MAAA;IACH,YAAA;IACA,QAAA,EAAU,SAAA;IACV,KAAA,GAAQ,cAAA;EAAA,IACN,OAAA,CAAQ,cAAA;AAAA;;;KClDF,gBAAA;EACV,UAAA;EACA,cAAA;EACA,mBAAA;EACA,eAAA;EACA,eAAA;EACA,aAAA;EACA,uBAAA;EACA,iBAAA;EACA,kBAAA;EACA,eAAA;EACA,eAAA;EACA,WAAA;EACA,YAAA;AAAA;;KAMU,kBAAA;EFmBF,mBEjBR,MAAA,IAAU,IAAA,mBFmBQ;EEjBlB,UAAA,IAAc,IAAA,UAAc,KAAA,oBFiBkB;EEf9C,YAAA,IAAgB,IAAA,UAAc,MAAA,EAAQ,cAAA,WFe8B;EEbpE,OAAA,IAAW,KAAA;;;;ADrBb;;;;;EC8BE,wBAAA,IAA4B,MAAA,oBDxB5B;EC0BA,SAAA,IAAa,OAAA,EAAS,gBAAA;AAAA;AAAA,KA0BZ,eAAA;ED7BA,iBC+BV,KAAA,UD/B2C;ECiC3C,SAAA,EAAW,KAAA;IAAQ,IAAA;IAAc,KAAA;IAAgB,MAAA,EAAQ,cAAA;EAAA,IDnB/C;ECqBV,QAAA,EAAU,SAAA,IDpBN;ECsBJ,OAAA,EAAS,gBAAA;AAAA;;;;;;AFvDX;;;;;;;;;;AAeA;;;;;;;;;;;;;;cGAa,QAAA;EAAA,QACH,GAAA;EHOsC;EAAA,QGLtC,MAAA;EHK4D;;;;cGCxD,GAAA;EFnCF;;;;;;;EE8CV,GAAA,CAAI,EAAA,EAAI,OAAA,EAAS,IAAA;EFxCZ;AAMP;;;EEkDE,GAAA,CAAI,EAAA,WAAa,OAAA;EFjDjB;EEsDA,GAAA,CAAI,EAAA;EFpDc;EEyDlB,KAAA,CAAA;EFzD8C;;;;;AAQhD;;;EE6DE,KAAA,CAAM,GAAA;EF3DN;EAAA,IEmEI,IAAA,CAAA;AAAA;;;;KCvFM,eAAA;EJMc,mBIJxB,QAAA;EJQgB;;;;;EIFhB,YAAA;EJEgB;AAWlB;;;;EIPE,WAAA;EJesD;;;;;EITtD,QAAA,GAAW,QAAA,EJOX;EILA,QAAA,WJOA;EILA,WAAA,WJKU;EIHV,aAAA;EJGsD;;;;EIEtD,iBAAA;;AHpCF;;;EGyCE,kBAAA,aHvCA;EGyCA,qBAAA;AAAA;;;AH/BF;;;;;;;;;;;;;AAWA;;;;;iBGgFgB,gBAAA,CACd,IAAA,GAAM,OAAA,EACN,OAAA,GAAS,eAAA;AAAA,iBA0YK,kBAAA,CAAA,GAAsB,cAAA;;;iBCetB,aAAA,CAAA,GAAiB,cAAA;;;iBCvfjB,kBAAA,CAAA,GAAsB,cAAA;;;iBCgMtB,cAAA,CAAA,GAAkB,cAAA;;;iBCxJlB,kBAAA,CAAA,GAAsB,cAAA;;;;;;ARxCtC;;;;;;;;;;AAeA;;;;;;;;;;;;;;KSXY,eAAA;EACV,IAAA;EACA,QAAA;EACA,MAAA,EAAQ,MAAA;EACR,MAAA;AAAA;;KAIU,gBAAA;EACV,IAAA;EACA,MAAA;EACA,MAAA;IACE,OAAA,WAAkB,MAAA;IAClB,OAAA,GAAU,MAAA;EAAA;AAAA;;;;ARhBd;;;;;iBQ8BgB,mBAAA,CAAA,IAEZ,QAAA,UACA,MAAA,EAAQ,MAAA,sBACP,OAAA;EAAU,OAAA,WAAkB,MAAA;EAAyB,OAAA,GAAU,MAAA;AAAA;;KAgCxD,eAAA,GAAkB,GAAA,UAE3B,MAAA,EAAQ,MAAA,sBAA4B,OAAA;EACnC,OAAA,WAAkB,MAAA;EAClB,OAAA,GAAU,MAAA;AAAA;;;;;;;;;iBAYE,mBAAA,CAAoB,SAAA,EAAW,eAAA;;;;KC1DnC,iBAAA,GAAoB,kBAAA;oBAE9B,UAAA,IAAc,QAAA;AAAA;AAAA,KAKJ,eAAA;ET3CU;;;;;;AAYtB;;;;;ES2CE,MAAA,GAAS,QAAA,ETxCS;ES0ClB,KAAA,WT1C8C;ES4C9C,QAAA,WT1CY;ES4CZ,KAAA,WT5CsB;ES8CtB,OAAA,WTxCwB;ES0CxB,MAAA,YTtCsB;ESwCtB,MAAA;ETxCA;;;;;ES8CA,YAAA,YAAwB,MAAA,kBT5CmB;ES8C3C,SAAA,WTpCkB;ESsClB,MAAA,YTnCY;ESqCZ,YAAA,YTnCY;ESqCZ,eAAA,GAAkB,eAAA;AAAA;AAAA,cAKP,QAAA;ET9CX;EAAA,wBSgDwB,yBAAA;ET9CtB;EAAA,wBSgDsB,kBAAA;ET/CtB;EAAA,QSkDM,MAAA;EAAA,QACA,KAAA;EAAA,QACA,QAAA;EAAA,QACA,KAAA;EAAA,QACA,OAAA;EAAA,QACA,MAAA;EAAA,QACA,MAAA;EAAA,QACA,SAAA;;UAEA,oBAAA;ER5GkB;EAAA,QQ8GlB,kBAAA;ER9GkB;EAAA,QQiHlB,MAAA;ER/GR;EAAA,QQiHQ,OAAA;ER/GR;EAAA,QQiHQ,YAAA;ER/GR;EAAA,QQiHQ,eAAA;ER/GR;EAAA,QQkHQ,QAAA;ERhHR;EQmHA,SAAA,EAAW,iBAAA;cAEC,OAAA,EAAS,eAAA;ERlHrB;EQyIA,aAAA,CAAA;ERzIY;EQsJZ,YAAA,CAAa,IAAA,EAAM,cAAA;ERhJS;;;;;EQyJ5B,UAAA,CAAW,IAAA;ERrJG;EQ2Jd,OAAA,CAAQ,IAAA;ERzJR;EQ8JA,YAAA,CAAA;ER9JsC;;;;EQsKtC,gBAAA,CAAA;ER3J4B;EQuK5B,QAAA,CAAA,GAAY,cAAA;ERrKU;EQ4KtB,QAAA,CAAS,KAAA;ER5K6B;;AA0BxC;;;;EQ4JE,SAAA,CAAU,MAAA,EAAQ,QAAA;ERtJR;EQ2JV,WAAA,CAAY,QAAA;ERzJa;EQ8JzB,QAAA,CAAS,KAAA;ERpKT;EQyKA,SAAA,CAAU,OAAA;ERvKC;EQ4KX,SAAA,CAAA;ER5KiC;EQiLjC,SAAA,CAAU,OAAA;ERjL+C;;;;;EQ0LzD,eAAA,CAAgB,MAAA;EAChB,eAAA,CAAgB,GAAA,UAAa,MAAA;;EAe7B,gBAAA,CAAiB,OAAA,EAAS,MAAA;;EAO1B,kBAAA,CAAmB,GAAA;EPrPA;EO0PnB,oBAAA,CAAqB,GAAA;EPtNG;EO+NxB,gBAAA,CAAA,GAAoB,MAAA;EPhQZ;EOqQR,kBAAA,CAAA;EP/PY;EOoQZ,SAAA,CAAU,OAAA;EPzPF;EO+PR,SAAA,CAAA;EP/PiB;EOoQjB,eAAA,CAAgB,OAAA;EPpPZ;EOyPJ,eAAA,CAAA;EPpPA;EOyPA,kBAAA,CAAmB,OAAA,EAAS,eAAA;EPpP5B;EOyPA,kBAAA,CAAA,GAAsB,eAAA;EP7OhB;EOkPN,YAAA,CAAA;EP1OQ;;;;;ACvFV;;;;EMgVQ,IAAA,CAAK,OAAA,WAAkB,OAAA,CAAQ,eAAA;ENxUrC;;;;;EAAA,QMgaQ,mBAAA;AAAA"}
package/dist/index.mjs CHANGED
@@ -111,7 +111,10 @@ async function readPageSnapshot(registry, options) {
111
111
  pruneLayout: options?.pruneLayout ?? true,
112
112
  maxNodes: options?.maxNodes ?? 500,
113
113
  maxChildren: options?.maxChildren ?? 30,
114
- maxTextLength: options?.maxTextLength ?? 40
114
+ maxTextLength: options?.maxTextLength ?? 40,
115
+ expandOptionLists: options?.expandOptionLists,
116
+ expandChildrenRefs: options?.expandChildrenRefs,
117
+ expandedChildrenLimit: options?.expandedChildrenLimit
115
118
  })).content);
116
119
  }
117
120
  /**
@@ -216,7 +219,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
216
219
  activeInstruction
217
220
  ];
218
221
  if (currentUrl) parts.push("", `URL: ${currentUrl}`);
219
- if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.", "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
222
+ if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.", "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", "If a needed list shows `... (N children omitted)` under a specific container, output `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>` and wait for next round snapshot.", "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
220
223
  if (protocolViolationHint) parts.push("", protocolViolationHint);
221
224
  messages.push({
222
225
  role: "user",
@@ -252,6 +255,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
252
255
  "If action changes DOM (open modal/navigate), stop after that batch and continue next round.",
253
256
  "Do NOT call page_info (get_url/get_title/query_all/snapshot).",
254
257
  "For dropdown/select fields, use dom with action=select_option (or fill on a select).",
258
+ "If a needed list shows `... (N children omitted)` under a specific container, output `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>` and wait for next round snapshot.",
255
259
  "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.",
256
260
  "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.",
257
261
  "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.",
@@ -445,6 +449,7 @@ async function executeAgentLoop(params) {
445
449
  let consecutiveSamePlannedBatch = 0;
446
450
  let lastRoundHadError = false;
447
451
  let protocolViolationHint;
452
+ const snapshotExpandRefIds = /* @__PURE__ */ new Set();
448
453
  let recoveryCount = 0;
449
454
  let redundantInterceptCount = 0;
450
455
  let pendingNotFoundRetry;
@@ -470,9 +475,37 @@ async function executeAgentLoop(params) {
470
475
  * Does exactly two things: read latest snapshot + update metrics.
471
476
  */
472
477
  const refreshSnapshot = async () => {
473
- pageContext.latestSnapshot = await readPageSnapshot(registry);
478
+ pageContext.latestSnapshot = await readPageSnapshot(registry, snapshotExpandRefIds.size > 0 ? {
479
+ expandChildrenRefs: Array.from(snapshotExpandRefIds),
480
+ expandedChildrenLimit: 120
481
+ } : void 0);
474
482
  recordSnapshotStats(pageContext.latestSnapshot);
475
483
  };
484
+ /**
485
+ * 解析模型文本中的快照放宽指令(中)/ Parse snapshot expansion hint from model text (EN).
486
+ *
487
+ * 约定:
488
+ * SNAPSHOT_HINT: EXPAND_CHILDREN #ref1 #ref2
489
+ */
490
+ const parseSnapshotExpandHints = (text) => {
491
+ if (!text) return [];
492
+ const refs = [];
493
+ const regex = /^\s*SNAPSHOT_HINT\s*:\s*EXPAND_CHILDREN\s+(.+)$/gim;
494
+ let match;
495
+ while ((match = regex.exec(text)) !== null) {
496
+ const tokens = (match[1] ?? "").match(/#[A-Za-z0-9_-]+/g) ?? [];
497
+ for (const token of tokens) refs.push(token.replace(/^#/, ""));
498
+ }
499
+ return refs;
500
+ };
501
+ /** 从工具输入提取 hash selector(如 #1rv01x),用于定向快照放宽。 */
502
+ const extractHashSelectorRef = (toolInput) => {
503
+ if (!toolInput || typeof toolInput !== "object") return null;
504
+ const selector = toolInput.selector;
505
+ if (typeof selector !== "string") return null;
506
+ const m = selector.trim().match(/^#([A-Za-z0-9_-]+)$/);
507
+ return m ? m[1] : null;
508
+ };
476
509
  if (pageContext.latestSnapshot) recordSnapshotStats(pageContext.latestSnapshot);
477
510
  /**
478
511
  * 追加工具轨迹(中)/ Append tool trace entry (EN).
@@ -617,6 +650,8 @@ async function executeAgentLoop(params) {
617
650
  inputTokens += response.usage?.inputTokens ?? 0;
618
651
  outputTokens += response.usage?.outputTokens ?? 0;
619
652
  const parsedInstructionState = deriveNextInstruction(response.text, remainingInstruction);
653
+ const snapshotHintRefs = parseSnapshotExpandHints(response.text);
654
+ for (const ref of snapshotHintRefs.slice(0, 8)) snapshotExpandRefIds.add(ref);
620
655
  if (!response.toolCalls || response.toolCalls.length === 0) {
621
656
  if (pendingNotFoundRetry) {
622
657
  const unresolvedHint = response.text?.toLowerCase() ?? "";
@@ -687,6 +722,10 @@ async function executeAgentLoop(params) {
687
722
  const executedTaskCalls = [];
688
723
  const roundMissingTasks = [];
689
724
  for (const tc of response.toolCalls) {
725
+ if (tc.name === "dom" && getToolAction(tc.input) === "scroll") {
726
+ const ref = extractHashSelectorRef(tc.input);
727
+ if (ref) snapshotExpandRefIds.add(ref);
728
+ }
690
729
  const redundant = checkRedundantSnapshot(tc.name, tc.input, pageContext.latestSnapshot, round);
691
730
  if (redundant) {
692
731
  appendToolTrace(round, tc.name, tc.input, redundant);
@@ -1397,6 +1436,8 @@ function buildSystemPrompt(params = {}) {
1397
1436
  "- If an action will change DOM (open modal, navigate), stop after that action batch and continue next round with new snapshot.",
1398
1437
  "- Do NOT call page_info (snapshot/query/get_url/get_title). Snapshot is already provided every round.",
1399
1438
  "- For dropdown/select, use dom action=select_option (or fill on select).",
1439
+ "- If a required list shows `... (N children omitted)` under a specific container, request focused expansion by outputting `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>`.",
1440
+ "- After outputting snapshot expansion hint, wait for the next refreshed snapshot before further scrolling/clicking on that list.",
1400
1441
  "- Verification whitelist: do NOT use get_text/get_attr to verify input/select values unless the user explicitly asks for verification.",
1401
1442
  "- Stop rule: when the requested state is achieved, stop calling tools. If verification is requested, verify once and then return REMAINING: DONE (no repeated get_text/get_attr on the same target).",
1402
1443
  "- Do NOT interact with AutoPilot UI unless user explicitly asks.",
@@ -2013,7 +2054,7 @@ function createDomTool() {
2013
2054
  name: "dom",
2014
2055
  description: [
2015
2056
  "Perform DOM operations on the current page.",
2016
- "Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
2057
+ "Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, scroll, press, get_text, get_attr, set_attr, add_class, remove_class.",
2017
2058
  "Input/Select rule: before each fill/type/select_option, click or focus the same target immediately in the same round.",
2018
2059
  "For multiple fields, use alternating pairs in one batch: focus/click A -> fill/type A -> focus/click B -> fill/type B.",
2019
2060
  "Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector.",
@@ -2022,10 +2063,11 @@ function createDomTool() {
2022
2063
  "Ordinal/index rule: treat visual order as 1-based when the instruction says 'the Nth item' (e.g. 4th star = 4th visible icon from left to right), and avoid off-by-one mistakes.",
2023
2064
  "Disambiguation rule: distinguish descriptive text/labels from actionable options. Do not click nearby label/help text; click the actual interactive option/control item (icon/button/option) that changes state.",
2024
2065
  "Unknown/complex components: if a container element (e.g. role=slider, rating, custom widget) has multiple child icons/items in the snapshot but you don't know how to operate it directly, try clicking the appropriate child element instead. For example, a rating component with 5 star icon children — click the 4th icon child to set 4 stars. A slider with a runway — clicking the runway at the right position may work. Always prefer interacting with visible children when the parent container doesn't respond to fill/click as expected.",
2025
- "fill supports role=slider elements: use fill with a numeric value on a role=slider container (rating/slider) to set its value programmatically."
2066
+ "fill supports role=slider elements: use fill with a numeric value on a role=slider container (rating/slider) to set its value programmatically.",
2067
+ "For wheel/virtualized pickers where target option is not visible yet, use scroll on the picker column first, then click/select the newly visible option. scroll supports steps for repeated scrolling in one call."
2026
2068
  ].join(" "),
2027
2069
  schema: Type.Object({
2028
- action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class." }),
2070
+ action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | scroll | press | get_text | get_attr | set_attr | add_class | remove_class." }),
2029
2071
  selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
2030
2072
  value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions." })),
2031
2073
  key: Type.Optional(Type.String({ description: "Key for press action. Supports combo: 'Enter', 'Control+a', 'Shift+Enter', 'Meta+c'" })),
@@ -2034,6 +2076,9 @@ function createDomTool() {
2034
2076
  attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr" })),
2035
2077
  className: Type.Optional(Type.String({ description: "CSS class name for add_class/remove_class" })),
2036
2078
  clickCount: Type.Optional(Type.Number({ description: "Click count (default 1). 2 = double-click, 3 = triple-click." })),
2079
+ deltaY: Type.Optional(Type.Number({ description: "Vertical scroll delta for scroll action. Positive = down, negative = up." })),
2080
+ deltaX: Type.Optional(Type.Number({ description: "Horizontal scroll delta for scroll action." })),
2081
+ steps: Type.Optional(Type.Number({ description: "Repeat count for scroll action (default 1, max 20)." })),
2037
2082
  waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 2000)." })),
2038
2083
  waitSeconds: Type.Optional(Type.Number({ description: "Wait timeout in seconds (fallback for waitMs)." })),
2039
2084
  force: Type.Optional(Type.Boolean({ description: "Skip actionability checks (default false)." }))
@@ -2360,6 +2405,37 @@ function createDomTool() {
2360
2405
  if (target instanceof HTMLElement) dispatchHoverEvents(target);
2361
2406
  return { content: `已悬停 ${describeElement(target)}` };
2362
2407
  }
2408
+ case "scroll": {
2409
+ const target = retarget(el, "none");
2410
+ const deltaY = typeof params.deltaY === "number" ? params.deltaY : typeof params.value === "string" && !Number.isNaN(Number(params.value)) ? Number(params.value) : 180;
2411
+ const deltaX = typeof params.deltaX === "number" ? params.deltaX : 0;
2412
+ const rawSteps = typeof params.steps === "number" ? Math.floor(params.steps) : 1;
2413
+ const steps = Math.min(20, Math.max(1, rawSteps));
2414
+ if (target instanceof HTMLElement) {
2415
+ scrollIntoViewIfNeeded(target);
2416
+ for (let i = 0; i < steps; i++) {
2417
+ target.scrollBy({
2418
+ top: deltaY,
2419
+ left: deltaX,
2420
+ behavior: "auto"
2421
+ });
2422
+ target.dispatchEvent(new WheelEvent("wheel", {
2423
+ bubbles: true,
2424
+ cancelable: true,
2425
+ deltaY,
2426
+ deltaX
2427
+ }));
2428
+ }
2429
+ return { content: `已滚动 ${describeElement(target)}: deltaY=${deltaY}, deltaX=${deltaX}, steps=${steps}` };
2430
+ }
2431
+ for (let i = 0; i < steps; i++) target.dispatchEvent(new WheelEvent("wheel", {
2432
+ bubbles: true,
2433
+ cancelable: true,
2434
+ deltaY,
2435
+ deltaX
2436
+ }));
2437
+ return { content: `已滚动 ${describeElement(target)}: deltaY=${deltaY}, deltaX=${deltaX}, steps=${steps}` };
2438
+ }
2363
2439
  case "press": {
2364
2440
  const key = params.key || params.value;
2365
2441
  if (!key) return { content: "缺少 key 参数(如 Enter, Escape, Tab, Control+a)" };
@@ -2447,6 +2523,31 @@ function createDomTool() {
2447
2523
  * snapshot — 获取页面 DOM 结构快照(AI 可读的文本描述)
2448
2524
  * query_all — 查询所有匹配选择器的元素,返回摘要信息
2449
2525
  */
2526
+ /** 快照属性值最大保留长度(超出截断)。 */
2527
+ const MAX_SNAPSHOT_ATTR_VALUE_LENGTH = 120;
2528
+ /** 选项列表放宽时的子节点上限(仍保留硬上限,避免快照无限膨胀)。 */
2529
+ const MAX_EXPANDED_LIST_CHILDREN = 120;
2530
+ /** 定向放宽 children 的硬上限。 */
2531
+ const MAX_EXPANDED_CHILDREN_LIMIT = 300;
2532
+ /**
2533
+ * 规整快照属性值,避免把长 base64/data URL 原样注入快照。
2534
+ */
2535
+ function sanitizeSnapshotAttrValue(value) {
2536
+ const trimmed = value.trim();
2537
+ if (!trimmed) return "";
2538
+ const dataUrlMatch = trimmed.match(/^data:([^,]*?),(.*)$/i);
2539
+ if (dataUrlMatch) {
2540
+ const meta = dataUrlMatch[1] || "";
2541
+ const payload = dataUrlMatch[2] || "";
2542
+ const isBase64 = /;base64/i.test(meta);
2543
+ const payloadLength = payload.length;
2544
+ const previewMeta = meta.slice(0, 48);
2545
+ if (isBase64 || payloadLength > 64) return `data:${previewMeta},<omitted:${payloadLength}>`;
2546
+ }
2547
+ if (trimmed.match(/^[A-Za-z0-9+/]{80,}={0,2}$/)) return `<base64:${trimmed.length}>`;
2548
+ if (trimmed.length > MAX_SNAPSHOT_ATTR_VALUE_LENGTH) return `${trimmed.slice(0, MAX_SNAPSHOT_ATTR_VALUE_LENGTH)}...`;
2549
+ return trimmed;
2550
+ }
2450
2551
  /**
2451
2552
  * 生成页面 DOM 快照 — 将 DOM 树转为 AI 可理解的文本描述。
2452
2553
  *
@@ -2475,6 +2576,9 @@ function generateSnapshot(root = document.body, options = {}) {
2475
2576
  const maxNodes = opts.maxNodes ?? 220;
2476
2577
  const maxChildren = opts.maxChildren ?? 25;
2477
2578
  const maxTextLength = opts.maxTextLength ?? 40;
2579
+ const expandOptionLists = opts.expandOptionLists ?? false;
2580
+ const expandedChildrenLimit = Math.min(MAX_EXPANDED_CHILDREN_LIMIT, Math.max(1, opts.expandedChildrenLimit ?? MAX_EXPANDED_LIST_CHILDREN));
2581
+ const expandChildrenRefSet = new Set((opts.expandChildrenRefs ?? []).map((ref) => ref.trim().replace(/^#/, "")).filter(Boolean));
2478
2582
  let emittedNodes = 0;
2479
2583
  let truncatedByNodeBudget = false;
2480
2584
  const refStore = opts.refStore;
@@ -2589,6 +2693,26 @@ function generateSnapshot(root = document.body, options = {}) {
2589
2693
  if (el.hasAttribute("aria-label")) return true;
2590
2694
  return false;
2591
2695
  }
2696
+ /** 判断是否为“选项列表”容器(时间/下拉/listbox 等)。 */
2697
+ function isOptionListContainer(el) {
2698
+ if (el.getAttribute("role") === "listbox") return true;
2699
+ const cls = (el.getAttribute("class") || "").toLowerCase();
2700
+ if (cls.includes("time-spinner__list") || cls.includes("select-dropdown") || cls.includes("virtual-list") || cls.includes("option")) return true;
2701
+ if (el.tagName === "UL") {
2702
+ const children = Array.from(el.children);
2703
+ if (children.length >= 20) {
2704
+ if (children.filter((child) => child.tagName === "LI").length / children.length >= .8) return true;
2705
+ }
2706
+ }
2707
+ return false;
2708
+ }
2709
+ /** 针对子节点截断计算动态上限。 */
2710
+ function resolveChildLimit(el, defaultLimit, hashId) {
2711
+ let nextLimit = defaultLimit;
2712
+ if (expandOptionLists && isOptionListContainer(el)) nextLimit = Math.max(nextLimit, MAX_EXPANDED_LIST_CHILDREN);
2713
+ if (hashId && expandChildrenRefSet.has(hashId)) nextLimit = Math.max(nextLimit, expandedChildrenLimit);
2714
+ return nextLimit;
2715
+ }
2592
2716
  function walk(el, depth, parentPath) {
2593
2717
  if (emittedNodes >= maxNodes) {
2594
2718
  truncatedByNodeBudget = true;
@@ -2603,6 +2727,7 @@ function generateSnapshot(root = document.body, options = {}) {
2603
2727
  const indent = " ".repeat(depth);
2604
2728
  const tag = el.tagName.toLowerCase();
2605
2729
  const currentPath = `${parentPath}/${tag}${getSiblingIndex(el)}`;
2730
+ const hashId = refStore ? refStore.set(el, currentPath) : void 0;
2606
2731
  const attrs = [];
2607
2732
  const elId = el.getAttribute("id");
2608
2733
  if (elId) attrs.push(`id="${elId}"`);
@@ -2613,7 +2738,10 @@ function generateSnapshot(root = document.body, options = {}) {
2613
2738
  }
2614
2739
  for (const attr of INTERACTIVE_ATTRS) {
2615
2740
  const val = el.getAttribute(attr);
2616
- if (val) attrs.push(`${attr}="${val}"`);
2741
+ if (val) {
2742
+ const safeVal = sanitizeSnapshotAttrValue(val);
2743
+ if (safeVal) attrs.push(`${attr}="${safeVal}"`);
2744
+ }
2617
2745
  }
2618
2746
  for (const attr of BOOLEAN_ATTRS) if (el.hasAttribute(attr)) attrs.push(attr);
2619
2747
  if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement || el instanceof HTMLButtonElement) {
@@ -2624,15 +2752,18 @@ function generateSnapshot(root = document.body, options = {}) {
2624
2752
  }
2625
2753
  if (el.hasAttribute("onclick")) attrs.push("onclick");
2626
2754
  const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
2627
- if (testId) attrs.push(`data-testid="${testId.slice(0, 25)}"`);
2755
+ if (testId) {
2756
+ const safeTestId = sanitizeSnapshotAttrValue(testId).slice(0, 25);
2757
+ if (safeTestId) attrs.push(`data-testid="${safeTestId}"`);
2758
+ }
2628
2759
  if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.value) {
2629
- const currentVal = el.value.slice(0, 40);
2760
+ const currentVal = sanitizeSnapshotAttrValue(el.value).slice(0, 40);
2630
2761
  if (el.getAttribute("value") !== currentVal) attrs.push(`val="${currentVal}"`);
2631
2762
  }
2632
2763
  if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio") && el.checked) {
2633
2764
  if (!attrs.includes("checked")) attrs.push("checked");
2634
2765
  }
2635
- if (el instanceof HTMLSelectElement && el.value) attrs.push(`val="${el.value.slice(0, 40)}"`);
2766
+ if (el instanceof HTMLSelectElement && el.value) attrs.push(`val="${sanitizeSnapshotAttrValue(el.value).slice(0, 40)}"`);
2636
2767
  if (el instanceof HTMLOptionElement && el.selected) {
2637
2768
  if (!attrs.includes("selected")) attrs.push("selected");
2638
2769
  }
@@ -2650,7 +2781,8 @@ function generateSnapshot(root = document.body, options = {}) {
2650
2781
  const interactiveChildren = allChildren.filter(isInteractiveElement);
2651
2782
  const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
2652
2783
  const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
2653
- const selectedChildren = orderedChildren.slice(0, maxChildren);
2784
+ const childLimit = resolveChildLimit(el, maxChildren, hashId);
2785
+ const selectedChildren = orderedChildren.slice(0, childLimit);
2654
2786
  const omittedChildren = orderedChildren.length - selectedChildren.length;
2655
2787
  const childBlocks = [];
2656
2788
  for (let i = 0; i < selectedChildren.length; i++) {
@@ -2668,17 +2800,16 @@ function generateSnapshot(root = document.body, options = {}) {
2668
2800
  let line = `${indent}[${tag}]`;
2669
2801
  if (directText) line += ` "${directText.slice(0, maxTextLength)}"`;
2670
2802
  if (attrs.length) line += ` ${attrs.join(" ")}`;
2671
- if (refStore) {
2672
- const hashId = refStore.set(el, currentPath);
2673
- line += ` #${hashId}`;
2674
- } else line += ` ref="${currentPath}"`;
2803
+ if (hashId) line += ` #${hashId}`;
2804
+ else line += ` ref="${currentPath}"`;
2675
2805
  const lines = [line];
2676
2806
  emittedNodes++;
2677
2807
  const allChildren = Array.from(el.children);
2678
2808
  const interactiveChildren = allChildren.filter(isInteractiveElement);
2679
2809
  const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
2680
2810
  const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
2681
- const selectedChildren = orderedChildren.slice(0, maxChildren);
2811
+ const childLimit = resolveChildLimit(el, maxChildren, hashId);
2812
+ const selectedChildren = orderedChildren.slice(0, childLimit);
2682
2813
  const omittedChildren = orderedChildren.length - selectedChildren.length;
2683
2814
  for (let i = 0; i < selectedChildren.length; i++) {
2684
2815
  const childResult = walk(selectedChildren[i], depth + 1, currentPath);
@@ -2737,7 +2868,10 @@ function createPageInfoTool() {
2737
2868
  pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" })),
2738
2869
  maxNodes: Type.Optional(Type.Number({ description: "Maximum nodes to include in snapshot (default: 220)" })),
2739
2870
  maxChildren: Type.Optional(Type.Number({ description: "Maximum children per element (default: 25)" })),
2740
- maxTextLength: Type.Optional(Type.Number({ description: "Maximum text length per node (default: 40)" }))
2871
+ maxTextLength: Type.Optional(Type.Number({ description: "Maximum text length per node (default: 40)" })),
2872
+ expandOptionLists: Type.Optional(Type.Boolean({ description: "Expand option-list containers to avoid child truncation (default: false)" })),
2873
+ expandChildrenRefs: Type.Optional(Type.Array(Type.String({ description: "Hash refs to expand child truncation for (e.g. #abc123)" }))),
2874
+ expandedChildrenLimit: Type.Optional(Type.Number({ description: "Child limit for expandChildrenRefs nodes (default: 120, max: 300)" }))
2741
2875
  }),
2742
2876
  execute: async (params) => {
2743
2877
  const action = params.action;
@@ -2764,6 +2898,9 @@ function createPageInfoTool() {
2764
2898
  const maxNodes = params.maxNodes ?? 220;
2765
2899
  const maxChildren = params.maxChildren ?? 25;
2766
2900
  const maxTextLength = params.maxTextLength ?? 40;
2901
+ const expandOptionLists = params.expandOptionLists ?? false;
2902
+ const expandChildrenRefs = Array.isArray(params.expandChildrenRefs) ? params.expandChildrenRefs.filter((ref) => typeof ref === "string") : void 0;
2903
+ const expandedChildrenLimit = typeof params.expandedChildrenLimit === "number" ? params.expandedChildrenLimit : void 0;
2767
2904
  return { content: generateSnapshot(document.body, {
2768
2905
  maxDepth,
2769
2906
  viewportOnly,
@@ -2771,6 +2908,9 @@ function createPageInfoTool() {
2771
2908
  maxNodes,
2772
2909
  maxChildren,
2773
2910
  maxTextLength,
2911
+ expandOptionLists,
2912
+ expandChildrenRefs,
2913
+ expandedChildrenLimit,
2774
2914
  refStore: getActiveRefStore()
2775
2915
  }) };
2776
2916
  }