agentpage 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -10,7 +10,25 @@ const DEFAULT_MAX_ROUNDS = 40;
10
10
  const DEFAULT_RECOVERY_WAIT_MS = 100;
11
11
  const DEFAULT_ACTION_RECOVERY_ROUNDS = 2;
12
12
  const DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2;
13
- const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 2e3;
13
+ const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 1e3;
14
+ const DEFAULT_ROUND_STABILITY_WAIT_TIMEOUT_MS = 4e3;
15
+ const DEFAULT_ROUND_STABILITY_WAIT_QUIET_MS = 200;
16
+ const DEFAULT_ROUND_STABILITY_WAIT_LOADING_SELECTORS = [
17
+ ".ant-spin",
18
+ ".ant-spin-spinning",
19
+ ".ant-skeleton",
20
+ ".el-loading-mask",
21
+ ".bk-loading",
22
+ ".bk-spin-loading",
23
+ ".bk-skeleton",
24
+ ".bk-sideslider-loading",
25
+ ".t-loading",
26
+ ".t-skeleton",
27
+ ".t-skeleton__row",
28
+ "[aria-busy=\"true\"]",
29
+ ".skeleton",
30
+ ".loading"
31
+ ];
14
32
  /** 快照起始标记 — 用于在消息中识别快照边界 */
15
33
  const SNAPSHOT_START = "<!-- SNAPSHOT_START -->";
16
34
  /** 快照结束标记 */
@@ -163,6 +181,36 @@ function shouldForceRoundBreak(toolName, toolInput) {
163
181
  return toolName === "evaluate";
164
182
  }
165
183
  /**
184
+ * 判定动作是否可能引发页面结构或状态变化。
185
+ *
186
+ * 用于“轮次后稳定等待”触发条件:
187
+ * - 命中 true:本轮结束后执行加载态 + DOM 静默双重等待
188
+ * - 命中 false:跳过等待,直接进入下一轮
189
+ */
190
+ function isPotentialDomMutation(toolName, toolInput) {
191
+ const action = getToolAction(toolInput);
192
+ if (toolName === "navigate") return true;
193
+ if (toolName === "evaluate") return true;
194
+ if (toolName !== "dom") return false;
195
+ if (!action) return false;
196
+ return [
197
+ "click",
198
+ "fill",
199
+ "select_option",
200
+ "clear",
201
+ "check",
202
+ "uncheck",
203
+ "type",
204
+ "focus",
205
+ "hover",
206
+ "scroll",
207
+ "press",
208
+ "set_attr",
209
+ "add_class",
210
+ "remove_class"
211
+ ].includes(action);
212
+ }
213
+ /**
166
214
  * 采集找不到元素任务。
167
215
  *
168
216
  * 返回 null 表示当前结果不属于“元素未找到”,
@@ -247,7 +295,7 @@ function hasToolError(result) {
247
295
  * 4) 返回稳定字符串给 loop,供后续注入消息与统计。
248
296
  *
249
297
  * 默认参数意图:
250
- * - `maxDepth=8`: 保留足够层级,减少关键控件被截断。
298
+ * - `maxDepth=12`: 保留更深层级,减少深层组件控件被截断。
251
299
  * - `viewportOnly=false`: 优先完整性,避免误判“元素不存在”。
252
300
  * - `pruneLayout=true`: 抑制纯布局噪声,降低 token 压力。
253
301
  * - `maxNodes=500` / `maxChildren=30`: 控制体积上限,兼顾可读性。
@@ -272,7 +320,7 @@ function hasToolError(result) {
272
320
  async function readPageSnapshot(registry, options) {
273
321
  return toContentString((await registry.dispatch("page_info", {
274
322
  action: "snapshot",
275
- maxDepth: options?.maxDepth ?? 8,
323
+ maxDepth: options?.maxDepth ?? 12,
276
324
  viewportOnly: options?.viewportOnly ?? false,
277
325
  pruneLayout: options?.pruneLayout ?? true,
278
326
  maxNodes: options?.maxNodes ?? 500,
@@ -696,7 +744,7 @@ function detectIdleLoop(toolCalls, consecutiveReadOnlyRounds) {
696
744
  * - 达到 `maxRounds`
697
745
  */
698
746
  async function executeAgentLoop(params) {
699
- const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
747
+ const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, roundStabilityWait, callbacks } = params;
700
748
  const tools = registry.getDefinitions();
701
749
  const allToolCalls = [];
702
750
  const fullToolTrace = [];
@@ -717,6 +765,12 @@ async function executeAgentLoop(params) {
717
765
  let lastRoundHadError = false;
718
766
  let protocolViolationHint;
719
767
  const snapshotExpandRefIds = /* @__PURE__ */ new Set();
768
+ const effectiveRoundStabilityWait = {
769
+ enabled: roundStabilityWait?.enabled ?? true,
770
+ timeoutMs: Math.max(200, Math.floor(roundStabilityWait?.timeoutMs ?? DEFAULT_ROUND_STABILITY_WAIT_TIMEOUT_MS)),
771
+ quietMs: Math.max(50, Math.floor(roundStabilityWait?.quietMs ?? DEFAULT_ROUND_STABILITY_WAIT_QUIET_MS)),
772
+ loadingSelectors: [...new Set([...DEFAULT_ROUND_STABILITY_WAIT_LOADING_SELECTORS, ...roundStabilityWait?.loadingSelectors ?? []].map((selector) => selector.trim()).filter(Boolean))]
773
+ };
720
774
  let recoveryCount = 0;
721
775
  let redundantInterceptCount = 0;
722
776
  let pendingNotFoundRetry;
@@ -748,6 +802,30 @@ async function executeAgentLoop(params) {
748
802
  } : void 0);
749
803
  recordSnapshotStats(pageContext.latestSnapshot);
750
804
  };
805
+ /**
806
+ * 轮次后稳定等待(双重等待)。
807
+ *
808
+ * 顺序固定为:
809
+ * 1) 等待 loading 指示器隐藏
810
+ * 2) 等待 DOM quiet window
811
+ */
812
+ const runRoundStabilityBarrier = async () => {
813
+ if (!effectiveRoundStabilityWait.enabled) return;
814
+ if (!registry.has("wait")) return;
815
+ const timeout = effectiveRoundStabilityWait.timeoutMs;
816
+ const loadingSelector = effectiveRoundStabilityWait.loadingSelectors.join(", ");
817
+ if (loadingSelector) await registry.dispatch("wait", {
818
+ action: "wait_for_selector",
819
+ selector: loadingSelector,
820
+ state: "hidden",
821
+ timeout
822
+ });
823
+ await registry.dispatch("wait", {
824
+ action: "wait_for_stable",
825
+ timeout,
826
+ quietMs: effectiveRoundStabilityWait.quietMs
827
+ });
828
+ };
751
829
  if (pageContext.latestSnapshot) recordSnapshotStats(pageContext.latestSnapshot);
752
830
  /**
753
831
  * 追加工具轨迹。
@@ -863,6 +941,7 @@ async function executeAgentLoop(params) {
863
941
  break;
864
942
  }
865
943
  let roundHasError = false;
944
+ let roundHasPotentialDomMutation = false;
866
945
  const executedTaskCalls = [];
867
946
  const roundMissingTasks = [];
868
947
  for (const tc of response.toolCalls) {
@@ -893,6 +972,7 @@ async function executeAgentLoop(params) {
893
972
  const missingTask = collectMissingTask(tc.name, tc.input, result);
894
973
  if (missingTask) roundMissingTasks.push(missingTask);
895
974
  if (result.details && typeof result.details === "object") roundHasError = roundHasError || Boolean(result.details.error);
975
+ if (!hasToolError(result) && isPotentialDomMutation(tc.name, tc.input)) roundHasPotentialDomMutation = true;
896
976
  if (tc.name === "page_info" && getToolAction(tc.input) === "snapshot") {
897
977
  pageContext.latestSnapshot = toContentString(result.content);
898
978
  recordSnapshotStats(pageContext.latestSnapshot);
@@ -916,6 +996,11 @@ async function executeAgentLoop(params) {
916
996
  lastRoundHadError = roundHasError;
917
997
  previousRoundTasks = buildTaskArray(executedTaskCalls);
918
998
  previousRoundPlannedTasks = plannedTasksCurrentRound;
999
+ if (parsedInstructionState.hasRemainingProtocol && remainingInstruction.trim().length === 0 && !roundHasError) {
1000
+ finalReply = response.text?.trim() || "任务已完成。";
1001
+ if (finalReply) callbacks?.onText?.(finalReply);
1002
+ break;
1003
+ }
919
1004
  const idleResult = detectIdleLoop(response.toolCalls.map((tc) => ({
920
1005
  name: tc.name,
921
1006
  input: tc.input
@@ -926,6 +1011,7 @@ async function executeAgentLoop(params) {
926
1011
  break;
927
1012
  }
928
1013
  consecutiveReadOnlyRounds = idleResult;
1014
+ if (roundHasPotentialDomMutation) await runRoundStabilityBarrier();
929
1015
  await refreshSnapshot();
930
1016
  }
931
1017
  const resultMessages = [...history ?? [], {
@@ -1575,7 +1661,13 @@ var ToolRegistry = class {
1575
1661
 
1576
1662
  //#endregion
1577
1663
  //#region src/core/system-prompt.ts
1578
- /** 规范化额外指令。 */
1664
+ /**
1665
+ * 规范化额外指令:统一转为非空字符串数组。
1666
+ *
1667
+ * - 单字符串 → 单元素数组
1668
+ * - 字符串数组 → 过滤空值
1669
+ * - undefined → 空数组
1670
+ */
1579
1671
  function normalizeExtraInstructions(input) {
1580
1672
  if (!input) return [];
1581
1673
  return (Array.isArray(input) ? input : [input]).map((s) => s.trim()).filter(Boolean);
@@ -1583,9 +1675,32 @@ function normalizeExtraInstructions(input) {
1583
1675
  /**
1584
1676
  * 构建系统提示词。
1585
1677
  *
1586
- * 约束:
1587
- * - 输出给模型的提示词正文统一为英文。
1588
- * - 中文仅用于源码注释,便于团队维护。
1678
+ * 输出结构(按章节顺序):
1679
+ * 1. **Core Rules** — Agent 核心行为规则
1680
+ * - 快照驱动决策:仅基于当前快照 + 剩余任务工作
1681
+ * - 增量消费模型:每轮执行后输出 REMAINING 推进任务
1682
+ * - hash ID 定位:仅交互元素携带 #hashID,非交互元素为上下文
1683
+ * - 事件信号:listeners="..." 标注运行时事件绑定
1684
+ * - 批量执行:同轮完成所有独立可见操作
1685
+ * - 输入顺序:fill/type 前必须先 focus/click 同一目标
1686
+ * - DOM 变化断轮:会改变 DOM 的动作执行后等待下一轮新快照
1687
+ * - 停机规则:任务完成后输出 REMAINING: DONE
1688
+ *
1689
+ * 2. **Listener Abbrevs** — 事件简写对照表
1690
+ * - 快照中 listeners="clk,inp,chg" 的简写含义
1691
+ * - 与 page-info-tool.ts 的 EVENT_ABBREV 映射一致
1692
+ *
1693
+ * 3. **Output Contract** — 输出协议
1694
+ * - 每轮返回工具调用 + REMAINING 文本行
1695
+ *
1696
+ * 4. **Available Tools**(可选) — 当前注册的工具及描述
1697
+ *
1698
+ * 5. **Reasoning Profile**(可选) — 思考深度配置
1699
+ *
1700
+ * 6. **Extra Instructions**(可选) — 用户自定义额外指令
1701
+ *
1702
+ * @param params - 构建参数(工具列表、思考深度、额外指令)
1703
+ * @returns 完整的系统提示词字符串(英文)
1589
1704
  */
1590
1705
  function buildSystemPrompt(params = {}) {
1591
1706
  const sections = [];
@@ -1598,6 +1713,12 @@ function buildSystemPrompt(params = {}) {
1598
1713
  " Input: (1) current remaining task, (2) previous round executed actions, (3) actions you execute this round.",
1599
1714
  " Output: new remaining task after removing this-round actions.",
1600
1715
  "- Use only visible targets from snapshot. Use #hashID as selector. Do not guess CSS selectors.",
1716
+ "- Only interactive elements (with events, inputs, buttons, links, etc.) carry #hashID. Elements without #hashID are context-only (labels, headings, text) and cannot be targeted.",
1717
+ "- Snapshot tag in brackets may show ARIA role instead of HTML tag when it better describes the interaction pattern (e.g. [combobox] for input with role=\"combobox\", [slider] for div with role=\"slider\"). Treat the bracket tag as the primary interaction hint.",
1718
+ "- listeners=\"...\" on snapshot indicates bound event handlers (see Listener Abbrevs below). Prefer targets with relevant listeners when multiple candidates look similar.",
1719
+ "- Click targeting rule (MANDATORY): for click/navigation actions, prioritize elements with explicit click signals (listeners containing clk/pdn/mdn, onclick, native link/button semantics, or role=button/link).",
1720
+ "- Do NOT click focus/hover-only nodes for navigation (e.g. listeners only like fcs/blr/men/mlv without click-related signals). Treat those as context labels unless no better actionable target exists.",
1721
+ "- Correlation fallback: if a click produced no progress, in the next round choose the nearest actionable sibling/ancestor within the same semantic group (same row/card/form), such as adjacent repo path/link/button, instead of repeating the same ineffective target.",
1601
1722
  "- Batch independent visible actions in one round. Do not split one form into many rounds unnecessarily.",
1602
1723
  "- Strict input order (MANDATORY): before every fill/type/select_option, click or focus the SAME target immediately in the SAME round.",
1603
1724
  "- Multi-field rule (MANDATORY): execute alternating pairs in one batch: focus/click field A -> fill/type A -> focus/click field B -> fill/type B.",
@@ -1616,6 +1737,9 @@ function buildSystemPrompt(params = {}) {
1616
1737
  "- Stop rule: when the requested state is achieved, stop calling tools. If verification is requested, verify once and then return REMAINING: DONE (no repeated get_text/get_attr on the same target).",
1617
1738
  "- Do NOT interact with AutoPilot UI unless user explicitly asks.",
1618
1739
  "",
1740
+ "## Listener Abbrevs",
1741
+ "clk=click dbl=dblclick mdn=mousedown mup=mouseup mmv=mousemove mov=mouseover mot=mouseout men=mouseenter mlv=mouseleave pdn=pointerdown pup=pointerup pmv=pointermove tst=touchstart ted=touchend kdn=keydown kup=keyup inp=input chg=change sub=submit fcs=focus blr=blur scl=scroll whl=wheel drg=drag drs=dragstart dre=dragend drp=drop ctx=contextmenu",
1742
+ "",
1619
1743
  "## Output Contract",
1620
1744
  "- Return tool calls for this round.",
1621
1745
  "- Also include one plain text line:",
@@ -1642,6 +1766,81 @@ function buildSystemPrompt(params = {}) {
1642
1766
  return sections.join("\n\n");
1643
1767
  }
1644
1768
 
1769
+ //#endregion
1770
+ //#region src/web/event-listener-tracker.ts
1771
+ const elementEventMap = /* @__PURE__ */ new WeakMap();
1772
+ let installed = false;
1773
+ let originalAddEventListener;
1774
+ let originalRemoveEventListener;
1775
+ function normalizeEventType(type) {
1776
+ if (typeof type !== "string") return null;
1777
+ return type.trim().toLowerCase() || null;
1778
+ }
1779
+ function canTrackElementTarget(target) {
1780
+ if (typeof Element === "undefined") return false;
1781
+ return target instanceof Element;
1782
+ }
1783
+ function trackElementEvent(target, type) {
1784
+ if (!canTrackElementTarget(target)) return;
1785
+ const prev = elementEventMap.get(target);
1786
+ if (prev) {
1787
+ prev.add(type);
1788
+ return;
1789
+ }
1790
+ elementEventMap.set(target, new Set([type]));
1791
+ }
1792
+ function untrackElementEvent(target, type) {
1793
+ if (!canTrackElementTarget(target)) return;
1794
+ const prev = elementEventMap.get(target);
1795
+ if (!prev) return;
1796
+ prev.delete(type);
1797
+ if (prev.size === 0) elementEventMap.delete(target);
1798
+ }
1799
+ /**
1800
+ * 安装全局监听追踪补丁(幂等)。
1801
+ */
1802
+ function installEventListenerTracking() {
1803
+ if (installed) return;
1804
+ if (typeof EventTarget === "undefined") return;
1805
+ const proto = EventTarget.prototype;
1806
+ const nativeAdd = proto.addEventListener;
1807
+ const nativeRemove = proto.removeEventListener;
1808
+ if (typeof nativeAdd !== "function" || typeof nativeRemove !== "function") return;
1809
+ originalAddEventListener = nativeAdd;
1810
+ originalRemoveEventListener = nativeRemove;
1811
+ proto.addEventListener = function patchedAddEventListener(type, listener, options) {
1812
+ originalAddEventListener?.call(this, type, listener, options);
1813
+ try {
1814
+ const normalizedType = normalizeEventType(type);
1815
+ if (!normalizedType || listener == null) return;
1816
+ trackElementEvent(this, normalizedType);
1817
+ } catch {}
1818
+ };
1819
+ proto.removeEventListener = function patchedRemoveEventListener(type, listener, options) {
1820
+ originalRemoveEventListener?.call(this, type, listener, options);
1821
+ try {
1822
+ const normalizedType = normalizeEventType(type);
1823
+ if (!normalizedType || listener == null) return;
1824
+ untrackElementEvent(this, normalizedType);
1825
+ } catch {}
1826
+ };
1827
+ installed = true;
1828
+ }
1829
+ /**
1830
+ * 读取元素已记录的事件名(排序后返回,便于稳定输出)。
1831
+ */
1832
+ function getTrackedElementEvents(el) {
1833
+ const set = elementEventMap.get(el);
1834
+ if (!set || set.size === 0) return [];
1835
+ return Array.from(set).sort();
1836
+ }
1837
+ /**
1838
+ * 判断元素是否存在至少一个被追踪到的事件绑定。
1839
+ */
1840
+ function hasTrackedElementEvents(el) {
1841
+ return (elementEventMap.get(el)?.size ?? 0) > 0;
1842
+ }
1843
+
1645
1844
  //#endregion
1646
1845
  //#region src/web/tools/dom-tool.ts
1647
1846
  /**
@@ -1661,7 +1860,7 @@ function buildSystemPrompt(params = {}) {
1661
1860
  *
1662
1861
  * 运行环境:浏览器 Content Script(直接访问 DOM,无 CDP)。
1663
1862
  */
1664
- const DEFAULT_WAIT_MS = 2e3;
1863
+ const DEFAULT_WAIT_MS = 1200;
1665
1864
  /** scrollIntoView 轮换策略(参考 Playwright dom.ts) */
1666
1865
  const SCROLL_OPTIONS = [
1667
1866
  void 0,
@@ -1721,6 +1920,16 @@ const KEY_CODE_MAP = {
1721
1920
  Alt: "AltLeft",
1722
1921
  Meta: "MetaLeft"
1723
1922
  };
1923
+ const FILL_RELEVANT_EVENTS = new Set([
1924
+ "input",
1925
+ "change",
1926
+ "focus",
1927
+ "blur",
1928
+ "keydown",
1929
+ "click",
1930
+ "mousedown",
1931
+ "pointerdown"
1932
+ ]);
1724
1933
  let activeRefStore;
1725
1934
  function setActiveRefStore(store) {
1726
1935
  activeRefStore = store;
@@ -1929,15 +2138,34 @@ function ensureActionable(el, action, selector, force) {
1929
2138
  "fill",
1930
2139
  "type",
1931
2140
  "clear"
1932
- ].includes(action) && !isEditableElement(el)) return {
1933
- content: `"${selector}" 不是可编辑元素,无法执行 ${action}`,
1934
- details: {
1935
- error: true,
1936
- code: "UNSUPPORTED_FILL_TARGET",
1937
- action,
1938
- selector
1939
- }
1940
- };
2141
+ ].includes(action) && !isEditableElement(el)) {
2142
+ if (action === "fill" && el.getAttribute("role") === "slider") return null;
2143
+ return {
2144
+ content: `"${selector}" 不是可编辑元素,无法执行 ${action}`,
2145
+ details: {
2146
+ error: true,
2147
+ code: "UNSUPPORTED_FILL_TARGET",
2148
+ action,
2149
+ selector
2150
+ }
2151
+ };
2152
+ }
2153
+ return null;
2154
+ }
2155
+ /**
2156
+ * 为 role=slider 查找关联的数值输入框。
2157
+ * 典型场景:Element Plus slider + input-number 同属一个 form-item。
2158
+ */
2159
+ function findAssociatedSliderInput(slider) {
2160
+ const candidates = [];
2161
+ const formItem = slider.closest(".el-form-item");
2162
+ if (formItem) candidates.push(formItem);
2163
+ let cursor = slider.parentElement;
2164
+ for (let depth = 0; cursor && depth < 4; depth++, cursor = cursor.parentElement) candidates.push(cursor);
2165
+ for (const scope of candidates) {
2166
+ const input = scope.querySelector("input[type=\"number\"], input[role=\"spinbutton\"], .el-input-number input:not([type=\"hidden\"])");
2167
+ if (input instanceof HTMLInputElement && isEditableElement(input) && isElementVisible(input)) return input;
2168
+ }
1941
2169
  return null;
1942
2170
  }
1943
2171
  function getClickPoint(el) {
@@ -2034,6 +2262,163 @@ function setNativeValue(el, value) {
2034
2262
  if (desc?.set) desc.set.call(el, value);
2035
2263
  else el.value = value;
2036
2264
  }
2265
+ function getFillEventSupportScore(el) {
2266
+ let score = 0;
2267
+ if (el.hasAttribute("oninput") || el.hasAttribute("onchange")) score += 80;
2268
+ if (el.hasAttribute("onfocus") || el.hasAttribute("onblur")) score += 60;
2269
+ if (el.hasAttribute("onclick")) score += 40;
2270
+ const tracked = getTrackedElementEvents(el);
2271
+ for (const eventName of tracked) {
2272
+ if (!FILL_RELEVANT_EVENTS.has(eventName)) continue;
2273
+ if (eventName === "input") score += 40;
2274
+ else if (eventName === "change") score += 35;
2275
+ else if (eventName === "focus" || eventName === "blur") score += 28;
2276
+ else if (eventName === "keydown") score += 24;
2277
+ else score += 14;
2278
+ }
2279
+ return score;
2280
+ }
2281
+ function isCandidateFillTarget(el) {
2282
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement) return !isElementDisabled(el);
2283
+ if (el instanceof HTMLElement && el.isContentEditable) return true;
2284
+ return false;
2285
+ }
2286
+ function executeFillOnResolvedTarget(target, value, selector, action, sourceHint) {
2287
+ if (target instanceof HTMLInputElement) {
2288
+ const type = target.type.toLowerCase();
2289
+ if (INPUT_BLOCKED_TYPES.has(type)) return {
2290
+ content: `"${selector}" 为 input[type=${type}],不支持 fill;请使用 click/check 等动作。`,
2291
+ details: {
2292
+ error: true,
2293
+ code: "UNSUPPORTED_FILL_TARGET",
2294
+ action,
2295
+ selector
2296
+ }
2297
+ };
2298
+ if (INPUT_SET_VALUE_TYPES.has(type)) {
2299
+ const finalVal = type === "color" ? value.toLowerCase().trim() : value.trim();
2300
+ target.focus();
2301
+ target.value = finalVal;
2302
+ if (target.value !== finalVal) return {
2303
+ content: `"${selector}" 填写格式不匹配(type=${type})`,
2304
+ details: {
2305
+ error: true,
2306
+ code: "MALFORMED_VALUE",
2307
+ action,
2308
+ selector
2309
+ }
2310
+ };
2311
+ dispatchInputEvents(target);
2312
+ const suffix = sourceHint ? `(${sourceHint})` : "";
2313
+ return { content: `已填写 ${describeElement(target)}: "${finalVal}"${suffix}` };
2314
+ }
2315
+ if (type === "number" && Number.isNaN(Number(value.trim()))) return {
2316
+ content: `"${selector}" 为 input[type=number],无法填写非数字 "${value}"`,
2317
+ details: {
2318
+ error: true,
2319
+ code: "INVALID_NUMBER",
2320
+ action,
2321
+ selector
2322
+ }
2323
+ };
2324
+ scrollIntoViewIfNeeded(target);
2325
+ target.focus();
2326
+ selectText(target);
2327
+ setNativeValue(target, value);
2328
+ dispatchInputEvents(target);
2329
+ if (target.value !== value) return {
2330
+ content: `"${selector}" 填写后值不一致:期望 "${value}",实际 "${target.value}"`,
2331
+ details: {
2332
+ error: true,
2333
+ code: "FILL_NOT_APPLIED",
2334
+ action,
2335
+ selector
2336
+ }
2337
+ };
2338
+ const suffix = sourceHint ? `(${sourceHint})` : "";
2339
+ return { content: `已填写 ${describeElement(target)}: "${value}"${suffix}` };
2340
+ }
2341
+ if (target instanceof HTMLTextAreaElement) {
2342
+ scrollIntoViewIfNeeded(target);
2343
+ target.focus();
2344
+ selectText(target);
2345
+ setNativeValue(target, value);
2346
+ dispatchInputEvents(target);
2347
+ const suffix = sourceHint ? `(${sourceHint})` : "";
2348
+ return { content: `已填写 ${describeElement(target)}: "${value}"${suffix}` };
2349
+ }
2350
+ if (target instanceof HTMLSelectElement) {
2351
+ target.focus();
2352
+ const options = Array.from(target.options);
2353
+ let matched = options.find((o) => o.value === value);
2354
+ if (!matched) {
2355
+ const normalized = value.trim().toLowerCase();
2356
+ matched = options.find((o) => o.text.trim().toLowerCase() === normalized);
2357
+ }
2358
+ if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
2359
+ target.value = matched.value;
2360
+ dispatchInputEvents(target);
2361
+ const suffix = sourceHint ? `(${sourceHint})` : "";
2362
+ return { content: `已填写 ${describeElement(target)}: "${value}"${suffix}` };
2363
+ }
2364
+ if (target instanceof HTMLElement && target.isContentEditable) {
2365
+ target.focus();
2366
+ selectText(target);
2367
+ if (value) document.execCommand("insertText", false, value);
2368
+ else document.execCommand("delete", false, void 0);
2369
+ const suffix = sourceHint ? `(${sourceHint})` : "";
2370
+ return { content: `已填写 ${describeElement(target)}: "${value}"${suffix}` };
2371
+ }
2372
+ return null;
2373
+ }
2374
+ function guessNearbyFillTarget(anchor, value) {
2375
+ const preferNumeric = Number.isFinite(Number(value));
2376
+ const scopeEntries = [];
2377
+ const formItem = anchor.closest(".el-form-item");
2378
+ if (formItem) scopeEntries.push({
2379
+ scope: formItem,
2380
+ level: 0
2381
+ });
2382
+ let cursor = anchor.parentElement;
2383
+ for (let level = 1; cursor && level <= 4; level++, cursor = cursor.parentElement) scopeEntries.push({
2384
+ scope: cursor,
2385
+ level
2386
+ });
2387
+ const visited = /* @__PURE__ */ new Set();
2388
+ let best = null;
2389
+ for (const { scope, level } of scopeEntries) {
2390
+ const candidates = Array.from(scope.querySelectorAll("input:not([type=\"hidden\"]), textarea, select, [contenteditable=\"true\"], [role=\"spinbutton\"]"));
2391
+ for (const candidate of candidates) {
2392
+ if (!(candidate instanceof Element)) continue;
2393
+ if (visited.has(candidate)) continue;
2394
+ visited.add(candidate);
2395
+ if (!isCandidateFillTarget(candidate)) continue;
2396
+ if (!isElementVisible(candidate)) continue;
2397
+ let score = 100 - level * 18;
2398
+ score += getFillEventSupportScore(candidate);
2399
+ if (candidate instanceof HTMLInputElement) {
2400
+ const type = candidate.type.toLowerCase();
2401
+ if (preferNumeric && (type === "number" || candidate.getAttribute("role") === "spinbutton")) score += 80;
2402
+ if (!preferNumeric && [
2403
+ "text",
2404
+ "",
2405
+ "search",
2406
+ "email",
2407
+ "tel",
2408
+ "url",
2409
+ "password"
2410
+ ].includes(type)) score += 36;
2411
+ }
2412
+ if (candidate.getAttribute("placeholder")) score += 8;
2413
+ if (candidate.getAttribute("aria-label")) score += 8;
2414
+ if (!best || score > best.score) best = {
2415
+ el: candidate,
2416
+ score
2417
+ };
2418
+ }
2419
+ }
2420
+ return best?.el ?? null;
2421
+ }
2037
2422
  function selectText(el) {
2038
2423
  if (el instanceof HTMLInputElement) {
2039
2424
  el.select();
@@ -2253,7 +2638,7 @@ function createDomTool() {
2253
2638
  deltaY: Type.Optional(Type.Number({ description: "Vertical scroll delta for scroll action. Positive = down, negative = up." })),
2254
2639
  deltaX: Type.Optional(Type.Number({ description: "Horizontal scroll delta for scroll action." })),
2255
2640
  steps: Type.Optional(Type.Number({ description: "Repeat count for scroll action (default 1, max 20)." })),
2256
- waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 2000)." })),
2641
+ waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 1200)." })),
2257
2642
  waitSeconds: Type.Optional(Type.Number({ description: "Wait timeout in seconds (fallback for waitMs)." })),
2258
2643
  force: Type.Optional(Type.Boolean({ description: "Skip actionability checks (default false)." }))
2259
2644
  }),
@@ -2335,87 +2720,71 @@ function createDomTool() {
2335
2720
  const value = params.value;
2336
2721
  if (value === void 0) return { content: "缺少 value 参数" };
2337
2722
  const target = retarget(el, "follow-label");
2338
- if (target instanceof HTMLInputElement) {
2339
- const type = target.type.toLowerCase();
2340
- if (INPUT_BLOCKED_TYPES.has(type)) return {
2341
- content: `"${selector}" input[type=${type}],不支持 fill;请使用 click/check 等动作。`,
2342
- details: {
2343
- error: true,
2344
- code: "UNSUPPORTED_FILL_TARGET",
2345
- action,
2346
- selector
2723
+ if (target instanceof HTMLElement && target.getAttribute("role") === "slider") {
2724
+ const numericValue = Number(value);
2725
+ if (!Number.isFinite(numericValue)) {
2726
+ const guessed = guessNearbyFillTarget(target, value);
2727
+ if (guessed) {
2728
+ const guessedResult = executeFillOnResolvedTarget(guessed, value, selector, action, "heuristic-nearby-target");
2729
+ if (guessedResult) return guessedResult;
2347
2730
  }
2348
- };
2349
- if (INPUT_SET_VALUE_TYPES.has(type)) {
2350
- const finalVal = type === "color" ? value.toLowerCase().trim() : value.trim();
2351
- target.focus();
2352
- target.value = finalVal;
2353
- if (target.value !== finalVal) return {
2354
- content: `"${selector}" 填写格式不匹配(type=${type})`,
2731
+ return {
2732
+ content: `"${selector}" 为 role=slider,未找到可推断填写目标`,
2355
2733
  details: {
2356
2734
  error: true,
2357
- code: "MALFORMED_VALUE",
2735
+ code: "UNSUPPORTED_FILL_TARGET",
2358
2736
  action,
2359
2737
  selector
2360
2738
  }
2361
2739
  };
2362
- dispatchInputEvents(target);
2363
- return { content: `已填写 ${describeElement(target)}: "${finalVal}"` };
2364
2740
  }
2365
- if (type === "number" && isNaN(Number(value.trim()))) return {
2366
- content: `"${selector}" 为 input[type=number],无法填写非数字 "${value}"`,
2367
- details: {
2368
- error: true,
2369
- code: "INVALID_NUMBER",
2370
- action,
2371
- selector
2372
- }
2373
- };
2374
- scrollIntoViewIfNeeded(target);
2375
- target.focus();
2376
- selectText(target);
2377
- setNativeValue(target, value);
2378
- dispatchInputEvents(target);
2379
- if (target.value !== value) return {
2380
- content: `"${selector}" 填写后值不一致:期望 "${value}",实际 "${target.value}"`,
2741
+ const linkedInput = findAssociatedSliderInput(target);
2742
+ if (linkedInput) {
2743
+ const filled = executeFillOnResolvedTarget(linkedInput, String(numericValue), selector, action, `from ${describeElement(target)}`);
2744
+ if (filled) return filled;
2745
+ }
2746
+ const min = Number(target.getAttribute("aria-valuemin") ?? "1");
2747
+ const max = Number(target.getAttribute("aria-valuemax") ?? String(target.children.length || 5));
2748
+ const discreteCount = Number.isFinite(max - min + 1) ? Math.max(1, Math.round(max - min + 1)) : target.children.length;
2749
+ const desiredIndex = Math.round(numericValue - min);
2750
+ const children = Array.from(target.children).filter((node) => node instanceof HTMLElement);
2751
+ if (children.length >= discreteCount && desiredIndex >= 0 && desiredIndex < children.length) {
2752
+ const item = children[desiredIndex];
2753
+ scrollIntoViewIfNeeded(item);
2754
+ dispatchClickEvents(item);
2755
+ return { content: `已点击 ${describeElement(item)},设置 ${describeElement(target)} 值为 ${numericValue}` };
2756
+ }
2757
+ const guessed = guessNearbyFillTarget(target, String(numericValue));
2758
+ if (guessed) {
2759
+ const guessedResult = executeFillOnResolvedTarget(guessed, String(numericValue), selector, action, "heuristic-nearby-target");
2760
+ if (guessedResult) return guessedResult;
2761
+ }
2762
+ return {
2763
+ content: `"${selector}" 为 role=slider,但未找到可写入输入框或可点击离散子项`,
2381
2764
  details: {
2382
2765
  error: true,
2383
- code: "FILL_NOT_APPLIED",
2766
+ code: "UNSUPPORTED_FILL_TARGET",
2384
2767
  action,
2385
2768
  selector
2386
2769
  }
2387
2770
  };
2388
- return { content: `已填写 ${describeElement(target)}: "${value}"` };
2389
2771
  }
2390
- if (target instanceof HTMLTextAreaElement) {
2391
- scrollIntoViewIfNeeded(target);
2392
- target.focus();
2393
- selectText(target);
2394
- setNativeValue(target, value);
2395
- dispatchInputEvents(target);
2396
- return { content: `已填写 ${describeElement(target)}: "${value}"` };
2772
+ const directFilled = executeFillOnResolvedTarget(target, value, selector, action);
2773
+ if (directFilled) return directFilled;
2774
+ const guessed = guessNearbyFillTarget(target, value);
2775
+ if (guessed) {
2776
+ const guessedResult = executeFillOnResolvedTarget(guessed, value, selector, action, "heuristic-nearby-target");
2777
+ if (guessedResult) return guessedResult;
2397
2778
  }
2398
- if (target instanceof HTMLSelectElement) {
2399
- target.focus();
2400
- const options = Array.from(target.options);
2401
- let matched = options.find((o) => o.value === value);
2402
- if (!matched) {
2403
- const n = value.trim().toLowerCase();
2404
- matched = options.find((o) => o.text.trim().toLowerCase() === n);
2779
+ return {
2780
+ content: `"${selector}" 不是可编辑元素,且未在附近找到可推断填写目标`,
2781
+ details: {
2782
+ error: true,
2783
+ code: "UNSUPPORTED_FILL_TARGET",
2784
+ action,
2785
+ selector
2405
2786
  }
2406
- if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
2407
- target.value = matched.value;
2408
- dispatchInputEvents(target);
2409
- return { content: `已填写 ${describeElement(target)}: "${value}"` };
2410
- }
2411
- if (target instanceof HTMLElement && target.isContentEditable) {
2412
- target.focus();
2413
- selectText(target);
2414
- if (value) document.execCommand("insertText", false, value);
2415
- else document.execCommand("delete", false, void 0);
2416
- return { content: `已填写 ${describeElement(target)}: "${value}"` };
2417
- }
2418
- return { content: `"${selector}" 不是可编辑元素` };
2787
+ };
2419
2788
  }
2420
2789
  case "select_option": {
2421
2790
  const value = params.value;
@@ -2703,6 +3072,40 @@ const MAX_SNAPSHOT_ATTR_VALUE_LENGTH = 120;
2703
3072
  const MAX_EXPANDED_LIST_CHILDREN = 120;
2704
3073
  /** 定向放宽 children 的硬上限。 */
2705
3074
  const MAX_EXPANDED_CHILDREN_LIMIT = 300;
3075
+ /** 事件名 → 快照简写映射(压缩 token)。 */
3076
+ const EVENT_ABBREV = {
3077
+ click: "clk",
3078
+ dblclick: "dbl",
3079
+ mousedown: "mdn",
3080
+ mouseup: "mup",
3081
+ mousemove: "mmv",
3082
+ mouseover: "mov",
3083
+ mouseout: "mot",
3084
+ mouseenter: "men",
3085
+ mouseleave: "mlv",
3086
+ pointerdown: "pdn",
3087
+ pointerup: "pup",
3088
+ pointermove: "pmv",
3089
+ touchstart: "tst",
3090
+ touchend: "ted",
3091
+ keydown: "kdn",
3092
+ keyup: "kup",
3093
+ input: "inp",
3094
+ change: "chg",
3095
+ submit: "sub",
3096
+ focus: "fcs",
3097
+ blur: "blr",
3098
+ scroll: "scl",
3099
+ wheel: "whl",
3100
+ drag: "drg",
3101
+ dragstart: "drs",
3102
+ dragend: "dre",
3103
+ drop: "drp",
3104
+ contextmenu: "ctx"
3105
+ };
3106
+ function abbrevEvent(name) {
3107
+ return EVENT_ABBREV[name] ?? name.slice(0, 3);
3108
+ }
2706
3109
  /**
2707
3110
  * 规整快照属性值,避免把长 base64/data URL 原样注入快照。
2708
3111
  */
@@ -2744,7 +3147,7 @@ function sanitizeSnapshotAttrValue(value) {
2744
3147
  */
2745
3148
  function generateSnapshot(root = document.body, options = {}) {
2746
3149
  const opts = typeof options === "number" ? { maxDepth: options } : options;
2747
- const maxDepth = opts.maxDepth ?? 6;
3150
+ const maxDepth = opts.maxDepth ?? 12;
2748
3151
  const viewportOnly = opts.viewportOnly ?? true;
2749
3152
  const pruneLayout = opts.pruneLayout ?? true;
2750
3153
  const maxNodes = opts.maxNodes ?? 220;
@@ -2808,6 +3211,64 @@ function generateSnapshot(root = document.body, options = {}) {
2808
3211
  "LABEL",
2809
3212
  "SUMMARY"
2810
3213
  ]);
3214
+ const INTERACTIVE_EVENTS = new Set([
3215
+ "click",
3216
+ "dblclick",
3217
+ "mousedown",
3218
+ "mouseup",
3219
+ "pointerdown",
3220
+ "pointerup",
3221
+ "touchstart",
3222
+ "touchend",
3223
+ "input",
3224
+ "change",
3225
+ "keydown",
3226
+ "keyup",
3227
+ "submit",
3228
+ "focus",
3229
+ "blur"
3230
+ ]);
3231
+ /** 交互性 ARIA role — 需要分配 hash ID 的角色集合 */
3232
+ const INTERACTIVE_ROLES = new Set([
3233
+ "button",
3234
+ "link",
3235
+ "tab",
3236
+ "switch",
3237
+ "slider",
3238
+ "checkbox",
3239
+ "radio",
3240
+ "combobox",
3241
+ "listbox",
3242
+ "option",
3243
+ "menuitem",
3244
+ "textbox",
3245
+ "spinbutton",
3246
+ "searchbox",
3247
+ "treeitem",
3248
+ "gridcell",
3249
+ "scrollbar"
3250
+ ]);
3251
+ /**
3252
+ * 事件优先级(值越大越优先):
3253
+ * 输入链路(input/change/focus/blur) > 点击链路(click/pointer) > 其他事件。
3254
+ */
3255
+ const EVENT_PRIORITY = {
3256
+ input: 140,
3257
+ change: 130,
3258
+ focus: 120,
3259
+ blur: 110,
3260
+ keydown: 100,
3261
+ keyup: 90,
3262
+ click: 80,
3263
+ dblclick: 70,
3264
+ pointerdown: 60,
3265
+ pointerup: 55,
3266
+ mousedown: 50,
3267
+ mouseup: 45,
3268
+ touchstart: 40,
3269
+ touchend: 35,
3270
+ submit: 30
3271
+ };
2811
3272
  /** 布尔状态属性 — 只在存在时输出(无值),如 disabled、checked */
2812
3273
  const BOOLEAN_ATTRS = [
2813
3274
  "disabled",
@@ -2856,15 +3317,75 @@ function generateSnapshot(root = document.body, options = {}) {
2856
3317
  if (el.getAttribute("id")) return false;
2857
3318
  if (el.getAttribute("role") || el.getAttribute("aria-label")) return false;
2858
3319
  for (const attr of Array.from(el.attributes)) if (attr.name.startsWith("on")) return false;
3320
+ if (hasTrackedElementEvents(el)) return false;
2859
3321
  if (directText) return false;
2860
3322
  return true;
2861
3323
  }
3324
+ function hasInteractiveTrackedEvents(el) {
3325
+ const tracked = getTrackedElementEvents(el);
3326
+ if (tracked.length === 0) return false;
3327
+ return tracked.some((name) => INTERACTIVE_EVENTS.has(name));
3328
+ }
3329
+ function getTrackedEventPriorityScore(el) {
3330
+ const tracked = getTrackedElementEvents(el);
3331
+ if (tracked.length === 0) return 0;
3332
+ let score = 0;
3333
+ for (const name of tracked) score += EVENT_PRIORITY[name] ?? 8;
3334
+ return score;
3335
+ }
3336
+ /**
3337
+ * 元素优先级:
3338
+ * 1) 输入控件/按钮等语义控件
3339
+ * 2) 事件追踪优先级(输入、点击、失焦等)
3340
+ * 3) inline 事件与可聚焦能力补充加分
3341
+ */
3342
+ function getElementPriorityScore(el) {
3343
+ let score = 0;
3344
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement) score += 200;
3345
+ else if (el instanceof HTMLButtonElement || el instanceof HTMLAnchorElement) score += 180;
3346
+ else if (el.getAttribute("role") === "button" || el.getAttribute("role") === "switch" || el.getAttribute("role") === "slider") score += 160;
3347
+ score += getTrackedEventPriorityScore(el);
3348
+ if (el.hasAttribute("onclick")) score += 60;
3349
+ if (el.hasAttribute("oninput") || el.hasAttribute("onchange")) score += 80;
3350
+ if (el.hasAttribute("onfocus") || el.hasAttribute("onblur")) score += 70;
3351
+ if (el.hasAttribute("tabindex")) score += 20;
3352
+ return score;
3353
+ }
3354
+ function orderChildrenByPriority(children) {
3355
+ return children.map((child, index) => ({
3356
+ child,
3357
+ index,
3358
+ interactive: isInteractiveElement(child),
3359
+ score: getElementPriorityScore(child)
3360
+ })).sort((a, b) => {
3361
+ if (a.interactive !== b.interactive) return a.interactive ? -1 : 1;
3362
+ if (b.score !== a.score) return b.score - a.score;
3363
+ return a.index - b.index;
3364
+ }).map((entry) => entry.child);
3365
+ }
2862
3366
  function isInteractiveElement(el) {
2863
3367
  if (INTERACTIVE_TAGS.has(el.tagName)) return true;
2864
3368
  if (el.hasAttribute("onclick")) return true;
2865
3369
  if (el.hasAttribute("role")) return true;
2866
3370
  if (el.hasAttribute("tabindex")) return true;
2867
3371
  if (el.hasAttribute("aria-label")) return true;
3372
+ if (hasInteractiveTrackedEvents(el)) return true;
3373
+ return false;
3374
+ }
3375
+ /**
3376
+ * 判断元素是否需要分配 hash ID(仅交互节点分配,节省 token)。
3377
+ *
3378
+ * 核心依据:元素是否绑定了交互事件(INTERACTIVE_EVENTS 集合)。
3379
+ * 辅助依据:语义交互标签、内联事件、ARIA role、tabindex 等兜底。
3380
+ */
3381
+ function needsHashId(el) {
3382
+ if (hasInteractiveTrackedEvents(el)) return true;
3383
+ for (const attr of Array.from(el.attributes)) if (attr.name.startsWith("on")) return true;
3384
+ if (INTERACTIVE_TAGS.has(el.tagName)) return true;
3385
+ const role = el.getAttribute("role");
3386
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
3387
+ if (el.hasAttribute("tabindex")) return true;
3388
+ if (el.isContentEditable && el.getAttribute("contenteditable") !== "inherit") return true;
2868
3389
  return false;
2869
3390
  }
2870
3391
  /** 判断是否为“选项列表”容器(时间/下拉/listbox 等)。 */
@@ -2900,8 +3421,11 @@ function generateSnapshot(root = document.body, options = {}) {
2900
3421
  if (!isInViewport(el, depth)) return "";
2901
3422
  const indent = " ".repeat(depth);
2902
3423
  const tag = el.tagName.toLowerCase();
3424
+ const rawRole = el.getAttribute("role");
3425
+ const useRoleAsTag = !!(rawRole && INTERACTIVE_ROLES.has(rawRole) && rawRole !== tag);
3426
+ const displayTag = useRoleAsTag ? rawRole : tag;
2903
3427
  const currentPath = `${parentPath}/${tag}${getSiblingIndex(el)}`;
2904
- const hashId = refStore ? refStore.set(el, currentPath) : void 0;
3428
+ const hashId = refStore && needsHashId(el) ? refStore.set(el, currentPath) : void 0;
2905
3429
  const attrs = [];
2906
3430
  const elId = el.getAttribute("id");
2907
3431
  if (elId) attrs.push(`id="${elId}"`);
@@ -2911,6 +3435,7 @@ function generateSnapshot(root = document.body, options = {}) {
2911
3435
  if (cls) attrs.push(`class="${cls}"`);
2912
3436
  }
2913
3437
  for (const attr of INTERACTIVE_ATTRS) {
3438
+ if (attr === "role" && useRoleAsTag) continue;
2914
3439
  const val = el.getAttribute(attr);
2915
3440
  if (val) {
2916
3441
  const safeVal = sanitizeSnapshotAttrValue(val);
@@ -2925,6 +3450,12 @@ function generateSnapshot(root = document.body, options = {}) {
2925
3450
  if (!attrs.includes("readonly")) attrs.push("readonly");
2926
3451
  }
2927
3452
  if (el.hasAttribute("onclick")) attrs.push("onclick");
3453
+ const trackedEvents = getTrackedElementEvents(el);
3454
+ if (trackedEvents.length > 0) {
3455
+ const preview = trackedEvents.slice(0, 6).map(abbrevEvent).join(",");
3456
+ const suffix = trackedEvents.length > 6 ? ",..." : "";
3457
+ attrs.push(`listeners="${preview}${suffix}"`);
3458
+ }
2928
3459
  const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
2929
3460
  if (testId) {
2930
3461
  const safeTestId = sanitizeSnapshotAttrValue(testId).slice(0, 25);
@@ -2951,10 +3482,7 @@ function generateSnapshot(root = document.body, options = {}) {
2951
3482
  }
2952
3483
  directText = directText.trim();
2953
3484
  if (isEmptyLayoutContainer(el, directText)) {
2954
- const allChildren = Array.from(el.children);
2955
- const interactiveChildren = allChildren.filter(isInteractiveElement);
2956
- const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
2957
- const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
3485
+ const orderedChildren = orderChildrenByPriority(Array.from(el.children));
2958
3486
  const childLimit = resolveChildLimit(el, maxChildren, hashId);
2959
3487
  const selectedChildren = orderedChildren.slice(0, childLimit);
2960
3488
  const omittedChildren = orderedChildren.length - selectedChildren.length;
@@ -2965,23 +3493,19 @@ function generateSnapshot(root = document.body, options = {}) {
2965
3493
  }
2966
3494
  if (childBlocks.length === 0 && omittedChildren <= 0) return "";
2967
3495
  if (!(childBlocks.length >= 2 || omittedChildren > 0)) return childBlocks.join("\n");
2968
- const groupLines = [`${" ".repeat(depth)}([${tag}] collapsed-group`];
3496
+ const groupLines = [`${" ".repeat(depth)}([${displayTag}] collapsed-group`];
2969
3497
  for (const block of childBlocks) groupLines.push(indentMultiline(block, 1));
2970
3498
  if (omittedChildren > 0) groupLines.push(`${" ".repeat(depth + 1)}... (${omittedChildren} children omitted)`);
2971
3499
  groupLines.push(`${" ".repeat(depth)})`);
2972
3500
  return groupLines.join("\n");
2973
3501
  }
2974
- let line = `${indent}[${tag}]`;
3502
+ let line = `${indent}[${displayTag}]`;
2975
3503
  if (directText) line += ` "${directText.slice(0, maxTextLength)}"`;
2976
3504
  if (attrs.length) line += ` ${attrs.join(" ")}`;
2977
3505
  if (hashId) line += ` #${hashId}`;
2978
- else line += ` ref="${currentPath}"`;
2979
3506
  const lines = [line];
2980
3507
  emittedNodes++;
2981
- const allChildren = Array.from(el.children);
2982
- const interactiveChildren = allChildren.filter(isInteractiveElement);
2983
- const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
2984
- const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
3508
+ const orderedChildren = orderChildrenByPriority(Array.from(el.children));
2985
3509
  const childLimit = resolveChildLimit(el, maxChildren, hashId);
2986
3510
  const selectedChildren = orderedChildren.slice(0, childLimit);
2987
3511
  const omittedChildren = orderedChildren.length - selectedChildren.length;
@@ -3037,7 +3561,7 @@ function createPageInfoTool() {
3037
3561
  schema: Type.Object({
3038
3562
  action: Type.String({ description: "Info action: get_url | get_title | get_selection | get_viewport | snapshot | query_all" }),
3039
3563
  selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
3040
- maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
3564
+ maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 12)" })),
3041
3565
  viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
3042
3566
  pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" })),
3043
3567
  maxNodes: Type.Optional(Type.Number({ description: "Maximum nodes to include in snapshot (default: 220)" })),
@@ -3066,7 +3590,7 @@ function createPageInfoTool() {
3066
3590
  return { content: JSON.stringify(info, null, 2) };
3067
3591
  }
3068
3592
  case "snapshot": {
3069
- const maxDepth = params.maxDepth ?? 6;
3593
+ const maxDepth = params.maxDepth ?? 12;
3070
3594
  const viewportOnly = params.viewportOnly ?? true;
3071
3595
  const pruneLayout = params.pruneLayout ?? true;
3072
3596
  const maxNodes = params.maxNodes ?? 220;
@@ -3220,7 +3744,7 @@ function createNavigateTool() {
3220
3744
  * - hash selector(如 #abc123)优先通过 RefStore 解析。
3221
3745
  * - 可见性语义与 dom-tool 保持一致(参考 Playwright 风格)。
3222
3746
  */
3223
- const DEFAULT_TIMEOUT = 1e4;
3747
+ const DEFAULT_TIMEOUT = 6e3;
3224
3748
  const POLL_INTERVAL_MS = 80;
3225
3749
  const STABLE_TICK_MS = 50;
3226
3750
  const OBSERVER_OPTIONS = {
@@ -3412,7 +3936,7 @@ function createWaitTool() {
3412
3936
  selector: Type.Optional(Type.String({ description: "CSS selector for wait_for_selector/wait_for_hidden" })),
3413
3937
  state: Type.Optional(Type.String({ description: "Selector state for wait_for_selector: attached | visible | hidden | detached (default: attached)" })),
3414
3938
  text: Type.Optional(Type.String({ description: "Text to wait for in wait_for_text" })),
3415
- timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" })),
3939
+ timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 6000)" })),
3416
3940
  quietMs: Type.Optional(Type.Number({ description: "Quiet window for wait_for_stable in milliseconds (default: 300)" }))
3417
3941
  }),
3418
3942
  execute: async (params) => {
@@ -3749,6 +4273,7 @@ function registerToolHandler(executors) {
3749
4273
  * │ └──────────┘ └────────────┘ └──────────────┘ │
3750
4274
  * └──────────────────────────────────────────────────┘
3751
4275
  */
4276
+ installEventListenerTracking();
3752
4277
  var WebAgent = class WebAgent {
3753
4278
  /** 默认系统提示词 key(兼容旧版 setSystemPrompt(prompt))。 */
3754
4279
  static DEFAULT_SYSTEM_PROMPT_KEY = "default";
@@ -3781,6 +4306,8 @@ var WebAgent = class WebAgent {
3781
4306
  autoSnapshot;
3782
4307
  /** 快照选项 */
3783
4308
  snapshotOptions;
4309
+ /** 轮次后稳定等待配置 */
4310
+ roundStabilityWait;
3784
4311
  /** 工具注册表实例 — 每个 WebAgent 拥有独立的工具集 */
3785
4312
  registry = new ToolRegistry();
3786
4313
  /** 事件回调 — 绑定后可实时获取 Agent 进度,用于 UI 展示 */
@@ -3797,6 +4324,7 @@ var WebAgent = class WebAgent {
3797
4324
  this.memory = options.memory ?? false;
3798
4325
  this.autoSnapshot = options.autoSnapshot ?? true;
3799
4326
  this.snapshotOptions = options.snapshotOptions ?? {};
4327
+ this.roundStabilityWait = options.roundStabilityWait;
3800
4328
  if (typeof options.systemPrompt === "string") this.setSystemPrompt(options.systemPrompt);
3801
4329
  else if (options.systemPrompt && typeof options.systemPrompt === "object") this.setSystemPrompts(options.systemPrompt);
3802
4330
  }
@@ -3961,7 +4489,7 @@ var WebAgent = class WebAgent {
3961
4489
  let initialSnapshot;
3962
4490
  try {
3963
4491
  const snapshot = generateSnapshot(document.body, {
3964
- maxDepth: 8,
4492
+ maxDepth: 12,
3965
4493
  viewportOnly: false,
3966
4494
  maxNodes: 500,
3967
4495
  maxChildren: 30,
@@ -3989,6 +4517,7 @@ var WebAgent = class WebAgent {
3989
4517
  history: this.memory ? this.history : void 0,
3990
4518
  dryRun: this.dryRun,
3991
4519
  maxRounds: this.maxRounds,
4520
+ roundStabilityWait: this.roundStabilityWait,
3992
4521
  callbacks: wrappedCallbacks
3993
4522
  });
3994
4523
  if (this.memory) this.history = result.messages;