agentpage 0.0.32 → 0.0.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -10,25 +10,7 @@ const DEFAULT_MAX_ROUNDS = 40;
10
10
  const DEFAULT_RECOVERY_WAIT_MS = 100;
11
11
  const DEFAULT_ACTION_RECOVERY_ROUNDS = 2;
12
12
  const DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2;
13
- const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 1e3;
14
- const DEFAULT_ROUND_STABILITY_WAIT_TIMEOUT_MS = 4e3;
15
- const DEFAULT_ROUND_STABILITY_WAIT_QUIET_MS = 200;
16
- const DEFAULT_ROUND_STABILITY_WAIT_LOADING_SELECTORS = [
17
- ".ant-spin",
18
- ".ant-spin-spinning",
19
- ".ant-skeleton",
20
- ".el-loading-mask",
21
- ".bk-loading",
22
- ".bk-spin-loading",
23
- ".bk-skeleton",
24
- ".bk-sideslider-loading",
25
- ".t-loading",
26
- ".t-skeleton",
27
- ".t-skeleton__row",
28
- "[aria-busy=\"true\"]",
29
- ".skeleton",
30
- ".loading"
31
- ];
13
+ const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 2e3;
32
14
  /** 快照起始标记 — 用于在消息中识别快照边界 */
33
15
  const SNAPSHOT_START = "<!-- SNAPSHOT_START -->";
34
16
  /** 快照结束标记 */
@@ -154,13 +136,11 @@ function deriveNextInstruction(text, currentInstruction) {
154
136
  */
155
137
  function reduceRemainingHeuristically(currentInstruction, executedCount) {
156
138
  if (!currentInstruction.trim() || executedCount <= 0) return currentInstruction;
157
- if (!/(->|=>|→|\bthen\b|\band then\b|\bnext\b|\bafter that\b|然后|接着|随后|之后|再)/i.test(currentInstruction)) return currentInstruction;
158
- const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").split(/\s*(?:then|and then|next|after that|然后|接着|随后|之后|再)\s*/gi).map((part) => part.trim()).filter(Boolean);
139
+ const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").replace(/[,,。;;]/g, " 然后 ").split(/\s*(?:然后|再|并且|并|接着|随后|之后)\s*/g).map((part) => part.trim()).filter(Boolean);
159
140
  if (parts.length <= 1) return currentInstruction;
160
- const consumedSteps = Math.min(Math.max(1, Math.floor(executedCount)), 1);
161
- const nextParts = parts.slice(Math.min(consumedSteps, parts.length));
141
+ const nextParts = parts.slice(Math.min(executedCount, parts.length));
162
142
  if (nextParts.length === 0) return "";
163
- return nextParts.join(" 然后 ");
143
+ return nextParts.join(" -> ");
164
144
  }
165
145
  /**
166
146
  * 判定是否强制断轮。
@@ -183,36 +163,6 @@ function shouldForceRoundBreak(toolName, toolInput) {
183
163
  return toolName === "evaluate";
184
164
  }
185
165
  /**
186
- * 判定动作是否可能引发页面结构或状态变化。
187
- *
188
- * 用于“轮次后稳定等待”触发条件:
189
- * - 命中 true:本轮结束后执行加载态 + DOM 静默双重等待
190
- * - 命中 false:跳过等待,直接进入下一轮
191
- */
192
- function isPotentialDomMutation(toolName, toolInput) {
193
- const action = getToolAction(toolInput);
194
- if (toolName === "navigate") return true;
195
- if (toolName === "evaluate") return true;
196
- if (toolName !== "dom") return false;
197
- if (!action) return false;
198
- return [
199
- "click",
200
- "fill",
201
- "select_option",
202
- "clear",
203
- "check",
204
- "uncheck",
205
- "type",
206
- "focus",
207
- "hover",
208
- "scroll",
209
- "press",
210
- "set_attr",
211
- "add_class",
212
- "remove_class"
213
- ].includes(action);
214
- }
215
- /**
216
166
  * 采集找不到元素任务。
217
167
  *
218
168
  * 返回 null 表示当前结果不属于“元素未找到”,
@@ -308,12 +258,6 @@ function hasToolError(result) {
308
258
  * - `pruneLayout=true` 时:无 id/无语义/无交互/无直接文本的布局容器会被“折叠”,
309
259
  * 子节点直接提升输出,减少无意义层级;当同一折叠容器提升出多个相邻节点时,
310
260
  * 快照会用括号分组块标记其关联来源(collapsed-group)。
311
- * - 布局主干保留:浅层结构优先保留(避免页面主骨架被过早折叠导致业务区域缺失)。
312
- * - 事件信号保留:节点自身存在事件绑定(inline/on* 或 addEventListener 追踪)时优先保留;
313
- * 中浅层会做受预算约束的子树事件探测,尽量保留潜在可操作链路。
314
- * - 语义文本保留:包含语义文本的容器优先保留,避免“有意义但非控件”信息被误删。
315
- * - 噪音过滤:跳过 `svg` 等装饰节点及 `__SVG_SPRITE_NODE__` sprite 容器,
316
- * 避免图标定义树挤占节点预算。
317
261
  * - `maxNodes`:全局节点预算,超限后停止继续遍历并追加 truncation 提示。
318
262
  * - `maxChildren`:每个父节点只保留前 N 个子元素,其余用 `... (n children omitted)` 汇总。
319
263
  * - `maxTextLength`:节点文本按长度截断,避免长段文案占满上下文。
@@ -485,10 +429,26 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
485
429
  content: `Done steps (do NOT repeat):\n${traceParts.join("\n")}`
486
430
  });
487
431
  const hasErrors = trace.some((e) => hasToolError(e.result));
488
- const needsMasterGoalAnchor = activeInstruction.trim().toLowerCase() !== userMessage.trim().toLowerCase();
489
- const contextParts = ["## Execution context"];
490
- if (needsMasterGoalAnchor) contextParts.push(`Master goal (reference only — do NOT restart from scratch):`, userMessage, "");
491
- contextParts.push("Current remaining instruction:", activeInstruction, "", "Task-reduction model:", "Input: current remaining instruction + previous round executed actions + this-round actions.", "Output: new remaining instruction after removing this-round actions.", "Start from visible page state directly. Do NOT restate task. Do NOT output planning text.", "Execute all independent visible sub-tasks in one round.", "Do NOT act on elements not present in this snapshot yet.", "If action changes DOM (open modal/navigate), stop after that batch and continue next round.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", "If a needed list shows `... (N children omitted)` under a specific container, output `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>` and wait for next round snapshot.", "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.", "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.", "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.");
432
+ const contextParts = [
433
+ "## Execution context",
434
+ "Current remaining instruction:",
435
+ activeInstruction,
436
+ "",
437
+ "Task-reduction model:",
438
+ "Input: current remaining instruction + previous round executed actions + this-round actions.",
439
+ "Output: new remaining instruction after removing this-round actions.",
440
+ "Start from visible page state directly. Do NOT restate task. Do NOT output planning text.",
441
+ "Execute all independent visible sub-tasks in one round.",
442
+ "Do NOT act on elements not present in this snapshot yet.",
443
+ "If action changes DOM (open modal/navigate), stop after that batch and continue next round.",
444
+ "Do NOT call page_info (get_url/get_title/query_all/snapshot).",
445
+ "For dropdown/select fields, use dom with action=select_option (or fill on a select).",
446
+ "If a needed list shows `... (N children omitted)` under a specific container, output `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>` and wait for next round snapshot.",
447
+ "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.",
448
+ "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.",
449
+ "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.",
450
+ allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content."
451
+ ];
492
452
  if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
493
453
  else contextParts.push("", "If the goal is fully done, reply with a short summary (no tool calls).");
494
454
  if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous round planned task array (already executed):", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`));
@@ -736,7 +696,7 @@ function detectIdleLoop(toolCalls, consecutiveReadOnlyRounds) {
736
696
  * - 达到 `maxRounds`
737
697
  */
738
698
  async function executeAgentLoop(params) {
739
- const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, roundStabilityWait, callbacks } = params;
699
+ const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
740
700
  const tools = registry.getDefinitions();
741
701
  const allToolCalls = [];
742
702
  const fullToolTrace = [];
@@ -757,12 +717,6 @@ async function executeAgentLoop(params) {
757
717
  let lastRoundHadError = false;
758
718
  let protocolViolationHint;
759
719
  const snapshotExpandRefIds = /* @__PURE__ */ new Set();
760
- const effectiveRoundStabilityWait = {
761
- enabled: roundStabilityWait?.enabled ?? true,
762
- timeoutMs: Math.max(200, Math.floor(roundStabilityWait?.timeoutMs ?? DEFAULT_ROUND_STABILITY_WAIT_TIMEOUT_MS)),
763
- quietMs: Math.max(50, Math.floor(roundStabilityWait?.quietMs ?? DEFAULT_ROUND_STABILITY_WAIT_QUIET_MS)),
764
- loadingSelectors: [...new Set([...DEFAULT_ROUND_STABILITY_WAIT_LOADING_SELECTORS, ...roundStabilityWait?.loadingSelectors ?? []].map((selector) => selector.trim()).filter(Boolean))]
765
- };
766
720
  let recoveryCount = 0;
767
721
  let redundantInterceptCount = 0;
768
722
  let pendingNotFoundRetry;
@@ -794,30 +748,6 @@ async function executeAgentLoop(params) {
794
748
  } : void 0);
795
749
  recordSnapshotStats(pageContext.latestSnapshot);
796
750
  };
797
- /**
798
- * 轮次后稳定等待(双重等待)。
799
- *
800
- * 顺序固定为:
801
- * 1) 等待 loading 指示器隐藏
802
- * 2) 等待 DOM quiet window
803
- */
804
- const runRoundStabilityBarrier = async () => {
805
- if (!effectiveRoundStabilityWait.enabled) return;
806
- if (!registry.has("wait")) return;
807
- const timeout = effectiveRoundStabilityWait.timeoutMs;
808
- const loadingSelector = effectiveRoundStabilityWait.loadingSelectors.join(", ");
809
- if (loadingSelector) await registry.dispatch("wait", {
810
- action: "wait_for_selector",
811
- selector: loadingSelector,
812
- state: "hidden",
813
- timeout
814
- });
815
- await registry.dispatch("wait", {
816
- action: "wait_for_stable",
817
- timeout,
818
- quietMs: effectiveRoundStabilityWait.quietMs
819
- });
820
- };
821
751
  if (pageContext.latestSnapshot) recordSnapshotStats(pageContext.latestSnapshot);
822
752
  /**
823
753
  * 追加工具轨迹。
@@ -933,7 +863,6 @@ async function executeAgentLoop(params) {
933
863
  break;
934
864
  }
935
865
  let roundHasError = false;
936
- let roundHasPotentialDomMutation = false;
937
866
  const executedTaskCalls = [];
938
867
  const roundMissingTasks = [];
939
868
  for (const tc of response.toolCalls) {
@@ -964,7 +893,6 @@ async function executeAgentLoop(params) {
964
893
  const missingTask = collectMissingTask(tc.name, tc.input, result);
965
894
  if (missingTask) roundMissingTasks.push(missingTask);
966
895
  if (result.details && typeof result.details === "object") roundHasError = roundHasError || Boolean(result.details.error);
967
- if (!hasToolError(result) && isPotentialDomMutation(tc.name, tc.input)) roundHasPotentialDomMutation = true;
968
896
  if (tc.name === "page_info" && getToolAction(tc.input) === "snapshot") {
969
897
  pageContext.latestSnapshot = toContentString(result.content);
970
898
  recordSnapshotStats(pageContext.latestSnapshot);
@@ -980,8 +908,7 @@ async function executeAgentLoop(params) {
980
908
  else pendingNotFoundRetry = void 0;
981
909
  if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
982
910
  else {
983
- const heuristicProgressUnits = executedTaskCalls.length > 0 ? 1 : 0;
984
- const nextByHeuristic = reduceRemainingHeuristically(remainingInstruction, heuristicProgressUnits);
911
+ const nextByHeuristic = reduceRemainingHeuristically(remainingInstruction, executedTaskCalls.length);
985
912
  if (nextByHeuristic !== remainingInstruction) remainingInstruction = nextByHeuristic;
986
913
  else roundHasError = true;
987
914
  }
@@ -999,7 +926,6 @@ async function executeAgentLoop(params) {
999
926
  break;
1000
927
  }
1001
928
  consecutiveReadOnlyRounds = idleResult;
1002
- if (roundHasPotentialDomMutation) await runRoundStabilityBarrier();
1003
929
  await refreshSnapshot();
1004
930
  }
1005
931
  const resultMessages = [...history ?? [], {
@@ -1684,16 +1610,12 @@ function buildSystemPrompt(params = {}) {
1684
1610
  "- If an action will change DOM (open modal, navigate), stop after that action batch and continue next round with new snapshot.",
1685
1611
  "- Do NOT call page_info (snapshot/query/get_url/get_title). Snapshot is already provided every round.",
1686
1612
  "- For dropdown/select, use dom action=select_option (or fill on select).",
1687
- "- Always cross-check planned actions against the original goal to avoid task drift (e.g., do not confuse create issue vs create repository).",
1688
1613
  "- If a required list shows `... (N children omitted)` under a specific container, request focused expansion by outputting `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>`.",
1689
1614
  "- After outputting snapshot expansion hint, wait for the next refreshed snapshot before further scrolling/clicking on that list.",
1690
1615
  "- Verification whitelist: do NOT use get_text/get_attr to verify input/select values unless the user explicitly asks for verification.",
1691
1616
  "- Stop rule: when the requested state is achieved, stop calling tools. If verification is requested, verify once and then return REMAINING: DONE (no repeated get_text/get_attr on the same target).",
1692
1617
  "- Do NOT interact with AutoPilot UI unless user explicitly asks.",
1693
1618
  "",
1694
- "## Listener Abbrevs",
1695
- "clk=click dbl=dblclick mdn=mousedown mup=mouseup mmv=mousemove mov=mouseover mot=mouseout men=mouseenter mlv=mouseleave pdn=pointerdown pup=pointerup pmv=pointermove tst=touchstart ted=touchend kdn=keydown kup=keyup inp=input chg=change sub=submit fcs=focus blr=blur scl=scroll whl=wheel drg=drag drs=dragstart dre=dragend drp=drop ctx=contextmenu",
1696
- "",
1697
1619
  "## Output Contract",
1698
1620
  "- Return tool calls for this round.",
1699
1621
  "- Also include one plain text line:",
@@ -1721,89 +1643,25 @@ function buildSystemPrompt(params = {}) {
1721
1643
  }
1722
1644
 
1723
1645
  //#endregion
1724
- //#region src/web/event-listener-tracker.ts
1725
- const elementEventMap = /* @__PURE__ */ new WeakMap();
1726
- let installed = false;
1727
- let originalAddEventListener;
1728
- let originalRemoveEventListener;
1729
- function normalizeEventType(type) {
1730
- if (typeof type !== "string") return null;
1731
- return type.trim().toLowerCase() || null;
1732
- }
1733
- function canTrackElementTarget(target) {
1734
- if (typeof Element === "undefined") return false;
1735
- return target instanceof Element;
1736
- }
1737
- function trackElementEvent(target, type) {
1738
- if (!canTrackElementTarget(target)) return;
1739
- const prev = elementEventMap.get(target);
1740
- if (prev) {
1741
- prev.add(type);
1742
- return;
1743
- }
1744
- elementEventMap.set(target, new Set([type]));
1745
- }
1746
- function untrackElementEvent(target, type) {
1747
- if (!canTrackElementTarget(target)) return;
1748
- const prev = elementEventMap.get(target);
1749
- if (!prev) return;
1750
- prev.delete(type);
1751
- if (prev.size === 0) elementEventMap.delete(target);
1752
- }
1753
- /**
1754
- * 安装全局监听追踪补丁(幂等)。
1755
- */
1756
- function installEventListenerTracking() {
1757
- if (installed) return;
1758
- if (typeof EventTarget === "undefined") return;
1759
- const proto = EventTarget.prototype;
1760
- const nativeAdd = proto.addEventListener;
1761
- const nativeRemove = proto.removeEventListener;
1762
- if (typeof nativeAdd !== "function" || typeof nativeRemove !== "function") return;
1763
- originalAddEventListener = nativeAdd;
1764
- originalRemoveEventListener = nativeRemove;
1765
- proto.addEventListener = function patchedAddEventListener(type, listener, options) {
1766
- originalAddEventListener?.call(this, type, listener, options);
1767
- try {
1768
- const normalizedType = normalizeEventType(type);
1769
- if (!normalizedType || listener == null) return;
1770
- trackElementEvent(this, normalizedType);
1771
- } catch {}
1772
- };
1773
- proto.removeEventListener = function patchedRemoveEventListener(type, listener, options) {
1774
- originalRemoveEventListener?.call(this, type, listener, options);
1775
- try {
1776
- const normalizedType = normalizeEventType(type);
1777
- if (!normalizedType || listener == null) return;
1778
- untrackElementEvent(this, normalizedType);
1779
- } catch {}
1780
- };
1781
- installed = true;
1782
- }
1783
- /**
1784
- * 读取元素已记录的事件名(排序后返回,便于稳定输出)。
1785
- */
1786
- function getTrackedElementEvents(el) {
1787
- const set = elementEventMap.get(el);
1788
- if (!set || set.size === 0) return [];
1789
- return Array.from(set).sort();
1790
- }
1791
- /**
1792
- * 判断元素是否存在至少一个被追踪到的事件绑定。
1793
- */
1794
- function hasTrackedElementEvents(el) {
1795
- return (elementEventMap.get(el)?.size ?? 0) > 0;
1796
- }
1797
-
1798
- //#endregion
1799
- //#region src/web/tools/dom-tool/constants.ts
1646
+ //#region src/web/tools/dom-tool.ts
1800
1647
  /**
1801
- * DOM Tool 常量定义。
1648
+ * DOM Tool — 浏览器 DOM 操作工具(结合 Playwright 核心交互模式增强)。
1649
+ *
1650
+ * 关键改进(参考 Playwright):
1651
+ * 1. retarget — 点击时自动重定向到 button/link/label.control
1652
+ * 2. scrollIntoView 多策略 — 4 种 block 对齐轮换,解决 sticky 遮挡
1653
+ * 3. stable 检查 — rAF 逐帧检测元素位置稳定后再操作
1654
+ * 4. hit-target 验证 — elementsFromPoint 检查是否被遮挡
1655
+ * 5. 完整点击事件链 — pointermove→pointerdown→mousedown→pointerup→mouseup→click
1656
+ * 6. check/uncheck 通过 click — 先检查→click 切换→验证状态
1657
+ * 7. press 组合键 — 支持 Control+a, Shift+Enter 等修饰键
1658
+ * 8. fill 分类型 — date/color/range 走 setValue,text 类走 selectAll+原生写入
1659
+ * 9. 自定义下拉增强 — 更广泛的 option 选择器 + 等待弹出
1660
+ * 10. ARIA disabled — 检查祖先链 aria-disabled
1802
1661
  *
1803
- * 包含:input 类型分类、修饰键集合、键码映射、滚动策略。
1662
+ * 运行环境:浏览器 Content Script(直接访问 DOM,无 CDP)。
1804
1663
  */
1805
- /** 默认等待超时(ms) */
1806
- const DEFAULT_WAIT_MS = 1200;
1664
+ const DEFAULT_WAIT_MS = 2e3;
1807
1665
  /** scrollIntoView 轮换策略(参考 Playwright dom.ts) */
1808
1666
  const SCROLL_OPTIONS = [
1809
1667
  void 0,
@@ -1863,9 +1721,6 @@ const KEY_CODE_MAP = {
1863
1721
  Alt: "AltLeft",
1864
1722
  Meta: "MetaLeft"
1865
1723
  };
1866
-
1867
- //#endregion
1868
- //#region src/web/tools/dom-tool/query.ts
1869
1724
  let activeRefStore;
1870
1725
  function setActiveRefStore(store) {
1871
1726
  activeRefStore = store;
@@ -1876,26 +1731,15 @@ function getActiveRefStore() {
1876
1731
  function sleep(ms) {
1877
1732
  return new Promise((r) => setTimeout(r, ms));
1878
1733
  }
1879
- /**
1880
- * 查询元素:优先 RefStore hash,回退 CSS 选择器。
1881
- * 支持复合 hash 选择器(如 "#hashID .child-class")——先解析 hash 根,再在其子树内 querySelector。
1882
- */
1734
+ /** 查询元素:优先 RefStore hash,回退 CSS 选择器 */
1883
1735
  function queryElement(selector) {
1884
1736
  try {
1885
1737
  if (selector.startsWith("#") && activeRefStore) {
1886
- const spaceIdx = selector.indexOf(" ");
1887
- const hashPart = spaceIdx > 0 ? selector.slice(1, spaceIdx) : selector.slice(1);
1888
- const rest = spaceIdx > 0 ? selector.slice(spaceIdx + 1).trim() : "";
1889
- if (activeRefStore.has(hashPart)) {
1890
- const root = activeRefStore.get(hashPart);
1891
- if (!root || !root.isConnected) {
1892
- activeRefStore.delete(hashPart);
1893
- return `未找到 ref "#${hashPart}" 对应的元素(可能已被移除或快照已过期)`;
1894
- }
1895
- if (!rest) return root;
1896
- const child = root.querySelector(rest);
1897
- if (!child) return `在 #${hashPart} 内未找到匹配 "${rest}" 的子元素`;
1898
- return child;
1738
+ const id = selector.slice(1);
1739
+ if (activeRefStore.has(id)) {
1740
+ const el = activeRefStore.get(id);
1741
+ if (!el) return `未找到 ref "${selector}" 对应的元素(可能已被移除或快照已过期)`;
1742
+ return el;
1899
1743
  }
1900
1744
  }
1901
1745
  const el = document.querySelector(selector);
@@ -1923,30 +1767,6 @@ function resolveWaitMs(params) {
1923
1767
  if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
1924
1768
  return DEFAULT_WAIT_MS;
1925
1769
  }
1926
- /** 生成元素的简洁描述字符串,用于工具调用结果的可读输出。 */
1927
- function describeElement(el) {
1928
- const tag = el.tagName.toLowerCase();
1929
- const id = el.id ? `#${el.id}` : "";
1930
- const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
1931
- const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
1932
- const textHint = text ? ` "${text}"` : "";
1933
- const hints = [];
1934
- for (const attr of [
1935
- "type",
1936
- "name",
1937
- "placeholder",
1938
- "href",
1939
- "role"
1940
- ]) {
1941
- const v = el.getAttribute(attr);
1942
- if (v) hints.push(`${attr}=${v}`);
1943
- }
1944
- if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
1945
- return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
1946
- }
1947
-
1948
- //#endregion
1949
- //#region src/web/tools/dom-tool/actionability.ts
1950
1770
  /** 检查元素样式可见性(处理 checkVisibility / details 折叠 / visibility) */
1951
1771
  function isStyleVisible(el, style) {
1952
1772
  style = style ?? window.getComputedStyle(el);
@@ -2027,6 +1847,23 @@ function checkElementStable(el, timeoutMs = 800) {
2027
1847
  requestAnimationFrame(check);
2028
1848
  });
2029
1849
  }
1850
+ /**
1851
+ * 将目标重定向到关联的交互控件。
1852
+ * - button-link:非交互元素→最近 button/[role=button]/a/[role=link]
1853
+ * - follow-label:label→control + 非交互→button/[role=button]/[role=checkbox]/[role=radio]
1854
+ */
1855
+ function retarget(el, mode) {
1856
+ if (mode === "none") return el;
1857
+ if (!el.matches("input, textarea, select") && !el.isContentEditable) if (mode === "button-link") el = el.closest("button, [role=button], a, [role=link]") || el;
1858
+ else el = el.closest("button, [role=button], [role=checkbox], [role=radio]") || el;
1859
+ if (mode === "follow-label") {
1860
+ if (!el.matches("a, input, textarea, button, select, [role=link], [role=button], [role=checkbox], [role=radio]") && !el.isContentEditable) {
1861
+ const label = el.closest("label");
1862
+ if (label?.control) el = label.control;
1863
+ }
1864
+ }
1865
+ return el;
1866
+ }
2030
1867
  function scrollIntoViewIfNeeded(el, retry = 0) {
2031
1868
  if (retry === 0 && "scrollIntoViewIfNeeded" in el) {
2032
1869
  el.scrollIntoViewIfNeeded(true);
@@ -2048,7 +1885,7 @@ function checkHitTarget(el) {
2048
1885
  if (topEl === el || el.contains(topEl) || topEl.contains(el)) return null;
2049
1886
  const sharedLabel = topEl.closest("label");
2050
1887
  if (sharedLabel && sharedLabel.contains(el)) return null;
2051
- return `<${topEl.tagName.toLowerCase()}${topEl.id ? `#${topEl.id}` : ""}>`;
1888
+ return describeElement(topEl);
2052
1889
  }
2053
1890
  function ensureActionable(el, action, selector, force) {
2054
1891
  if (force) return null;
@@ -2103,15 +1940,6 @@ function ensureActionable(el, action, selector, force) {
2103
1940
  };
2104
1941
  return null;
2105
1942
  }
2106
-
2107
- //#endregion
2108
- //#region src/web/tools/dom-tool/events.ts
2109
- /**
2110
- * DOM Tool — 事件派发与键盘操作。
2111
- *
2112
- * 包含:完整点击事件链、hover 事件链、input/change 派发、
2113
- * 原生 setter 写入、selectText、组合键 press。
2114
- */
2115
1943
  function getClickPoint(el) {
2116
1944
  const r = el.getBoundingClientRect();
2117
1945
  return {
@@ -2120,7 +1948,7 @@ function getClickPoint(el) {
2120
1948
  };
2121
1949
  }
2122
1950
  /**
2123
- * 完整点击事件链:
1951
+ * 完整点击事件链(参考 Playwright Mouse.click):
2124
1952
  * pointermove → mousemove → (per clickCount) pointerdown → mousedown → focus → pointerup → mouseup → click
2125
1953
  */
2126
1954
  function dispatchClickEvents(el, clickCount = 1) {
@@ -2288,31 +2116,25 @@ function executePress(el, key) {
2288
2116
  ...modState
2289
2117
  }));
2290
2118
  }
2291
-
2292
- //#endregion
2293
- //#region src/web/tools/dom-tool/resolve.ts
2294
- /**
2295
- * DOM Tool 目标解析与归一化。
2296
- *
2297
- * 包含:retarget、checkable 目标归一化、pointer action 代理、
2298
- * 表单项控件重定向、editable 穿透。
2299
- */
2300
- /**
2301
- * 将目标重定向到关联的交互控件。
2302
- * - button-link:非交互元素→最近 button/[role=button]/a/[role=link]
2303
- * - follow-label:label→control + 非交互→button/[role=button]/[role=checkbox]/[role=radio]
2304
- */
2305
- function retarget(el, mode) {
2306
- if (mode === "none") return el;
2307
- if (!el.matches("input, textarea, select") && !el.isContentEditable) if (mode === "button-link") el = el.closest("button, [role=button], a, [role=link]") || el;
2308
- else el = el.closest("button, [role=button], [role=checkbox], [role=radio]") || el;
2309
- if (mode === "follow-label") {
2310
- if (!el.matches("a, input, textarea, button, select, [role=link], [role=button], [role=checkbox], [role=radio]") && !el.isContentEditable) {
2311
- const label = el.closest("label");
2312
- if (label?.control) el = label.control;
2313
- }
2119
+ function describeElement(el) {
2120
+ const tag = el.tagName.toLowerCase();
2121
+ const id = el.id ? `#${el.id}` : "";
2122
+ const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
2123
+ const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
2124
+ const textHint = text ? ` "${text}"` : "";
2125
+ const hints = [];
2126
+ for (const attr of [
2127
+ "type",
2128
+ "name",
2129
+ "placeholder",
2130
+ "href",
2131
+ "role"
2132
+ ]) {
2133
+ const v = el.getAttribute(attr);
2134
+ if (v) hints.push(`${attr}=${v}`);
2314
2135
  }
2315
- return el;
2136
+ if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
2137
+ return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
2316
2138
  }
2317
2139
  function getChecked(el) {
2318
2140
  if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio")) return el.checked;
@@ -2359,30 +2181,6 @@ function resolvePointerActionTarget(el) {
2359
2181
  return el;
2360
2182
  }
2361
2183
  /**
2362
- * 点击目标上卷:当命中文本/装饰子节点时,优先上卷到最近可点击祖先。
2363
- *
2364
- * 典型场景:
2365
- * - 列表项文本本身无 click,但父级容器(如 .g-pointer)有点击语义
2366
- * - 事件委托绑定在祖先,子节点点击命中不稳定
2367
- */
2368
- function resolveClickableAncestorTarget(el) {
2369
- const isSelfClickable = (node) => {
2370
- if (node.matches("a, button, input, textarea, select, summary, [role=button], [role=link], [role=menuitem]")) return true;
2371
- if (node.hasAttribute("onclick")) return true;
2372
- const tabIndexAttr = node.getAttribute("tabindex");
2373
- if (tabIndexAttr !== null && tabIndexAttr !== "-1") return true;
2374
- if (getTrackedElementEvents(node).some((name) => name === "click" || name === "pointerdown" || name === "mousedown")) return true;
2375
- return false;
2376
- };
2377
- if (isSelfClickable(el)) return el;
2378
- let ancestor = el.parentElement;
2379
- for (let depth = 0; ancestor && depth < 6; depth++, ancestor = ancestor.parentElement) {
2380
- if (!isElementVisible(ancestor)) continue;
2381
- if (isSelfClickable(ancestor)) return ancestor;
2382
- }
2383
- return el;
2384
- }
2385
- /**
2386
2184
  * 当命中表单项说明 label(如 Element Plus el-form-item__label)时,
2387
2185
  * 自动重定向到同一表单项中的首个可交互控件。
2388
2186
  */
@@ -2397,35 +2195,6 @@ function resolveFormItemControlTarget(el) {
2397
2195
  if (control && isElementVisible(control)) return control;
2398
2196
  return el;
2399
2197
  }
2400
- /**
2401
- * 穿透包裹容器,查找内部可编辑子元素。
2402
- * 覆盖 UI 框架常见模式:wrapper div 包裹真实 input/textarea。
2403
- * 若自身已可编辑则直接返回;否则在子树中搜索第一个可编辑且可见的控件。
2404
- * 对 role=slider/spinbutton 等 ARIA widget:向上逐级查找最近容器中的关联 input。
2405
- */
2406
- function resolveEditableTarget(el) {
2407
- if (isEditableElement(el)) return el;
2408
- const inner = el.querySelector("input:not([type=\"hidden\"]), textarea, select, [contenteditable=\"true\"]");
2409
- if (inner && isEditableElement(inner) && isElementVisible(inner)) return inner;
2410
- const role = el.getAttribute("role");
2411
- if (role === "slider" || role === "spinbutton") {
2412
- let ancestor = el.parentElement;
2413
- for (let depth = 0; ancestor && depth < 5; depth++, ancestor = ancestor.parentElement) {
2414
- const input = ancestor.querySelector("input[type=\"number\"], input[role=\"spinbutton\"], input:not([type=\"hidden\"])");
2415
- if (input instanceof HTMLInputElement && isEditableElement(input) && isElementVisible(input)) return input;
2416
- }
2417
- }
2418
- return el;
2419
- }
2420
-
2421
- //#endregion
2422
- //#region src/web/tools/dom-tool/dropdown.ts
2423
- /**
2424
- * DOM Tool — 自定义下拉增强。
2425
- *
2426
- * 包含:全局可见 option 查找、下拉弹出等待。
2427
- */
2428
- /** 在全局可见 option 节点中按文本匹配(精确 → 包含) */
2429
2198
  function findVisibleOptionByText(text) {
2430
2199
  const target = text.trim().toLowerCase();
2431
2200
  if (!target) return null;
@@ -2446,7 +2215,6 @@ function findVisibleOptionByText(text) {
2446
2215
  for (const n of visible) if (n.textContent?.trim().toLowerCase().includes(target)) return n;
2447
2216
  return null;
2448
2217
  }
2449
- /** 轮询等待下拉弹出层出现 */
2450
2218
  async function waitForDropdownPopup(maxWait = 500) {
2451
2219
  const start = Date.now();
2452
2220
  while (Date.now() - start < maxWait) {
@@ -2455,33 +2223,22 @@ async function waitForDropdownPopup(maxWait = 500) {
2455
2223
  await sleep(50);
2456
2224
  }
2457
2225
  }
2458
-
2459
- //#endregion
2460
- //#region src/web/tools/dom-tool/index.ts
2461
- /**
2462
- * DOM Tool — 浏览器 DOM 操作工具入口(结合 Playwright 核心交互模式增强)。
2463
- *
2464
- * 关键能力:
2465
- * 1. retarget — 点击时自动重定向到 button/link/label.control
2466
- * 2. scrollIntoView 多策略 — 4 种 block 对齐轮换,解决 sticky 遮挡
2467
- * 3. stable 检查 — rAF 逐帧检测元素位置稳定后再操作
2468
- * 4. hit-target 验证 — elementsFromPoint 检查是否被遮挡
2469
- * 5. 完整点击事件链 — pointermove→pointerdown→mousedown→pointerup→mouseup→click
2470
- * 6. check/uncheck 通过 click — 先检查→click 切换→验证状态
2471
- * 7. press 组合键 — 支持 Control+a, Shift+Enter 等修饰键
2472
- * 8. fill 分类型 — date/color/range 走 setValue,text 类走 selectAll+原生写入
2473
- * 9. 自定义下拉增强 — 更广泛的 option 选择器 + 等待弹出
2474
- * 10. ARIA disabled — 检查祖先链 aria-disabled
2475
- *
2476
- * 运行环境:浏览器 Content Script(直接访问 DOM,无 CDP)。
2477
- */
2478
2226
  function createDomTool() {
2479
2227
  return {
2480
2228
  name: "dom",
2481
2229
  description: [
2482
2230
  "Perform DOM operations on the current page.",
2483
2231
  "Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, scroll, press, get_text, get_attr, set_attr, add_class, remove_class.",
2484
- "fill auto-resolves wrapper inner input. check/uncheck toggles via click. press supports combos (Control+a). scroll supports steps for repeated scrolling."
2232
+ "Input/Select rule: before each fill/type/select_option, click or focus the same target immediately in the same round.",
2233
+ "For multiple fields, use alternating pairs in one batch: focus/click A -> fill/type A -> focus/click B -> fill/type B.",
2234
+ "Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector.",
2235
+ "press supports combo keys like 'Control+a', 'Shift+Enter'.",
2236
+ "check/uncheck is done via click — state change is verified after action.",
2237
+ "Ordinal/index rule: treat visual order as 1-based when the instruction says 'the Nth item' (e.g. 4th star = 4th visible icon from left to right), and avoid off-by-one mistakes.",
2238
+ "Disambiguation rule: distinguish descriptive text/labels from actionable options. Do not click nearby label/help text; click the actual interactive option/control item (icon/button/option) that changes state.",
2239
+ "Unknown/complex components: if a container element (e.g. role=slider, rating, custom widget) has multiple child icons/items in the snapshot but you don't know how to operate it directly, try clicking the appropriate child element instead. For example, a rating component with 5 star icon children — click the 4th icon child to set 4 stars. A slider with a runway — clicking the runway at the right position may work. Always prefer interacting with visible children when the parent container doesn't respond to fill/click as expected.",
2240
+ "fill supports role=slider elements: use fill with a numeric value on a role=slider container (rating/slider) to set its value programmatically.",
2241
+ "For wheel/virtualized pickers where target option is not visible yet, use scroll on the picker column first, then click/select the newly visible option. scroll supports steps for repeated scrolling in one call."
2485
2242
  ].join(" "),
2486
2243
  schema: Type.Object({
2487
2244
  action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | scroll | press | get_text | get_attr | set_attr | add_class | remove_class." }),
@@ -2496,7 +2253,7 @@ function createDomTool() {
2496
2253
  deltaY: Type.Optional(Type.Number({ description: "Vertical scroll delta for scroll action. Positive = down, negative = up." })),
2497
2254
  deltaX: Type.Optional(Type.Number({ description: "Horizontal scroll delta for scroll action." })),
2498
2255
  steps: Type.Optional(Type.Number({ description: "Repeat count for scroll action (default 1, max 20)." })),
2499
- waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 1200)." })),
2256
+ waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 2000)." })),
2500
2257
  waitSeconds: Type.Optional(Type.Number({ description: "Wait timeout in seconds (fallback for waitMs)." })),
2501
2258
  force: Type.Optional(Type.Boolean({ description: "Skip actionability checks (default false)." }))
2502
2259
  }),
@@ -2544,18 +2301,13 @@ function createDomTool() {
2544
2301
  el = r;
2545
2302
  }
2546
2303
  if (action === "check" || action === "uncheck") el = resolveCheckableTarget(el);
2547
- if ([
2548
- "fill",
2549
- "type",
2550
- "clear"
2551
- ].includes(action)) el = resolveEditableTarget(retarget(el, "follow-label"));
2552
- const actionabilityTarget = action === "click" || action === "check" || action === "uncheck" ? resolvePointerActionTarget(resolveClickableAncestorTarget(resolveFormItemControlTarget(el))) : el;
2304
+ const actionabilityTarget = action === "click" || action === "check" || action === "uncheck" ? resolvePointerActionTarget(resolveFormItemControlTarget(el)) : el;
2553
2305
  try {
2554
2306
  const checkResult = ensureActionable(actionabilityTarget, action, selector, force);
2555
2307
  if (checkResult) return checkResult;
2556
2308
  switch (action) {
2557
2309
  case "click": {
2558
- const target = resolvePointerActionTarget(resolveClickableAncestorTarget(resolveFormItemControlTarget(retarget(el, force ? "none" : "button-link"))));
2310
+ const target = resolvePointerActionTarget(resolveFormItemControlTarget(retarget(el, force ? "none" : "button-link")));
2559
2311
  const clickCount = typeof params.clickCount === "number" ? params.clickCount : 1;
2560
2312
  if (target instanceof HTMLOptionElement) {
2561
2313
  const parent = target.parentElement;
@@ -2582,7 +2334,7 @@ function createDomTool() {
2582
2334
  case "fill": {
2583
2335
  const value = params.value;
2584
2336
  if (value === void 0) return { content: "缺少 value 参数" };
2585
- const target = el;
2337
+ const target = retarget(el, "follow-label");
2586
2338
  if (target instanceof HTMLInputElement) {
2587
2339
  const type = target.type.toLowerCase();
2588
2340
  if (INPUT_BLOCKED_TYPES.has(type)) return {
@@ -2725,7 +2477,7 @@ function createDomTool() {
2725
2477
  return { content: `已选择 ${describeElement(target)}: value="${selected.value}", label="${selected.text.trim()}"` };
2726
2478
  }
2727
2479
  case "clear": {
2728
- const target = el;
2480
+ const target = retarget(el, "follow-label");
2729
2481
  if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) {
2730
2482
  scrollIntoViewIfNeeded(target);
2731
2483
  target.focus();
@@ -2785,7 +2537,7 @@ function createDomTool() {
2785
2537
  case "type": {
2786
2538
  const value = params.value;
2787
2539
  if (value === void 0) return { content: "缺少 value 参数" };
2788
- const target = el;
2540
+ const target = retarget(el, "follow-label");
2789
2541
  scrollIntoViewIfNeeded(target);
2790
2542
  if (target instanceof HTMLElement) target.focus();
2791
2543
  for (const char of value) {
@@ -2952,50 +2704,6 @@ const MAX_EXPANDED_LIST_CHILDREN = 120;
2952
2704
  /** 定向放宽 children 的硬上限。 */
2953
2705
  const MAX_EXPANDED_CHILDREN_LIMIT = 300;
2954
2706
  /**
2955
- * 事件名 → 快照简写映射。
2956
- * 目的:大幅压缩 listeners="..." 占用的 token,同时保留可读性。
2957
- * 简写规则在 system-prompt 中向模型说明。
2958
- */
2959
- const EVENT_ABBREV = {
2960
- click: "clk",
2961
- dblclick: "dbl",
2962
- mousedown: "mdn",
2963
- mouseup: "mup",
2964
- mousemove: "mmv",
2965
- mouseover: "mov",
2966
- mouseout: "mot",
2967
- mouseenter: "men",
2968
- mouseleave: "mlv",
2969
- pointerdown: "pdn",
2970
- pointerup: "pup",
2971
- pointermove: "pmv",
2972
- pointerenter: "pen",
2973
- pointerleave: "plv",
2974
- touchstart: "tst",
2975
- touchend: "ted",
2976
- touchmove: "tmv",
2977
- keydown: "kdn",
2978
- keyup: "kup",
2979
- keypress: "kpr",
2980
- input: "inp",
2981
- change: "chg",
2982
- submit: "sub",
2983
- focus: "fcs",
2984
- blur: "blr",
2985
- scroll: "scl",
2986
- wheel: "whl",
2987
- drag: "drg",
2988
- dragstart: "drs",
2989
- dragend: "dre",
2990
- drop: "drp",
2991
- contextmenu: "ctx",
2992
- resize: "rsz"
2993
- };
2994
- /** 将完整事件名转为快照简写(未收录的取前 3 字符)。 */
2995
- function abbrevEvent(name) {
2996
- return EVENT_ABBREV[name] ?? name.slice(0, 3);
2997
- }
2998
- /**
2999
2707
  * 规整快照属性值,避免把长 base64/data URL 原样注入快照。
3000
2708
  */
3001
2709
  function sanitizeSnapshotAttrValue(value) {
@@ -3036,18 +2744,17 @@ function sanitizeSnapshotAttrValue(value) {
3036
2744
  */
3037
2745
  function generateSnapshot(root = document.body, options = {}) {
3038
2746
  const opts = typeof options === "number" ? { maxDepth: options } : options;
3039
- const maxDepth = opts.maxDepth ?? 7;
2747
+ const maxDepth = opts.maxDepth ?? 6;
3040
2748
  const viewportOnly = opts.viewportOnly ?? true;
3041
2749
  const pruneLayout = opts.pruneLayout ?? true;
3042
- const maxNodes = opts.maxNodes ?? 280;
3043
- const maxChildren = opts.maxChildren ?? 32;
2750
+ const maxNodes = opts.maxNodes ?? 220;
2751
+ const maxChildren = opts.maxChildren ?? 25;
3044
2752
  const maxTextLength = opts.maxTextLength ?? 40;
3045
2753
  const expandOptionLists = opts.expandOptionLists ?? false;
3046
2754
  const expandedChildrenLimit = Math.min(MAX_EXPANDED_CHILDREN_LIMIT, Math.max(1, opts.expandedChildrenLimit ?? MAX_EXPANDED_LIST_CHILDREN));
3047
2755
  const expandChildrenRefSet = new Set((opts.expandChildrenRefs ?? []).map((ref) => ref.trim().replace(/^#/, "")).filter(Boolean));
3048
2756
  let emittedNodes = 0;
3049
2757
  let truncatedByNodeBudget = false;
3050
- const emittedRefIds = /* @__PURE__ */ new Set();
3051
2758
  const refStore = opts.refStore;
3052
2759
  const SKIP_TAGS = new Set([
3053
2760
  "SCRIPT",
@@ -3083,11 +2790,7 @@ function generateSnapshot(root = document.body, options = {}) {
3083
2790
  "value",
3084
2791
  "name",
3085
2792
  "role",
3086
- "tabindex",
3087
2793
  "aria-label",
3088
- "aria-valuenow",
3089
- "aria-valuemin",
3090
- "aria-valuemax",
3091
2794
  "src",
3092
2795
  "alt",
3093
2796
  "title",
@@ -3105,25 +2808,6 @@ function generateSnapshot(root = document.body, options = {}) {
3105
2808
  "LABEL",
3106
2809
  "SUMMARY"
3107
2810
  ]);
3108
- /** 常见可交互事件(用于提升元素交互优先级)。 */
3109
- const INTERACTIVE_EVENTS = new Set([
3110
- "click",
3111
- "dblclick",
3112
- "mousedown",
3113
- "mouseup",
3114
- "pointerdown",
3115
- "pointerup",
3116
- "touchstart",
3117
- "touchend",
3118
- "input",
3119
- "change",
3120
- "keydown",
3121
- "keyup",
3122
- "keypress",
3123
- "submit",
3124
- "focus",
3125
- "blur"
3126
- ]);
3127
2811
  /** 布尔状态属性 — 只在存在时输出(无值),如 disabled、checked */
3128
2812
  const BOOLEAN_ATTRS = [
3129
2813
  "disabled",
@@ -3158,70 +2842,6 @@ function generateSnapshot(root = document.body, options = {}) {
3158
2842
  if (rect.width === 0 && rect.height === 0) return false;
3159
2843
  return true;
3160
2844
  }
3161
- /** 统一标签名键值(HTML/SVG 在不同环境可能大小写不一致)。 */
3162
- function getTagKey(el) {
3163
- return (el.tagName || "").toUpperCase();
3164
- }
3165
- /** 判断元素是否存在绑定事件(inline 或 addEventListener 追踪)。 */
3166
- function hasBoundEvents(el) {
3167
- if (hasTrackedElementEvents(el)) return true;
3168
- for (const attr of Array.from(el.attributes)) if (attr.name.startsWith("on")) return true;
3169
- return false;
3170
- }
3171
- /**
3172
- * 轻量检测:当前容器浅层子树里是否出现事件绑定节点。
3173
- * 仅用于是否保留布局容器,预算受控避免再次吞掉整页层级。
3174
- */
3175
- function hasBoundEventsInShallowSubtree(el, scanBudget = 48, maxTreeDepth = 2) {
3176
- const queue = Array.from(el.children).map((node) => ({
3177
- node,
3178
- depth: 1
3179
- }));
3180
- let scanned = 0;
3181
- while (queue.length > 0) {
3182
- const current = queue.shift();
3183
- if (!current) continue;
3184
- if (hasBoundEvents(current.node)) return true;
3185
- scanned += 1;
3186
- if (scanned >= scanBudget) return false;
3187
- if (current.depth >= maxTreeDepth) continue;
3188
- for (const child of Array.from(current.node.children)) queue.push({
3189
- node: child,
3190
- depth: current.depth + 1
3191
- });
3192
- }
3193
- return false;
3194
- }
3195
- /** 判断文本是否具有语义信息(过滤纯符号/超短噪音)。 */
3196
- function isSemanticText(text) {
3197
- const normalized = text.replace(/\s+/g, "").trim();
3198
- if (!normalized) return false;
3199
- if (normalized.length < 2) return false;
3200
- return /[\p{Script=Han}A-Za-z0-9]/u.test(normalized);
3201
- }
3202
- /** 在子树内查找语义文本(浅层限流,避免额外大开销)。 */
3203
- function hasSemanticTextInSubtree(el, scanBudget = 180) {
3204
- const stack = Array.from(el.children);
3205
- let scanned = 0;
3206
- while (stack.length > 0) {
3207
- const current = stack.pop();
3208
- if (!current) continue;
3209
- let directText = "";
3210
- for (let i = 0; i < current.childNodes.length; i++) {
3211
- const node = current.childNodes[i];
3212
- if (node.nodeType === Node.TEXT_NODE) {
3213
- const t = node.textContent?.trim();
3214
- if (t) directText += t + " ";
3215
- }
3216
- }
3217
- if (isSemanticText(directText.trim())) return true;
3218
- scanned += 1;
3219
- if (scanned >= scanBudget) return false;
3220
- const children = Array.from(current.children);
3221
- for (let i = children.length - 1; i >= 0; i--) stack.push(children[i]);
3222
- }
3223
- return false;
3224
- }
3225
2845
  /**
3226
2846
  * 判断元素是否为「无意义布局容器」(智能剪枝候选)。
3227
2847
  * 满足所有条件时返回 true:
@@ -3230,30 +2850,21 @@ function generateSnapshot(root = document.body, options = {}) {
3230
2850
  * 3. 没有交互属性(href/role/aria-label/onclick 等)
3231
2851
  * 4. 没有直接文本内容
3232
2852
  */
3233
- function isEmptyLayoutContainer(el, directText, depth) {
2853
+ function isEmptyLayoutContainer(el, directText) {
3234
2854
  if (!pruneLayout) return false;
3235
- if (depth <= 2) return false;
3236
- if (!LAYOUT_TAGS.has(getTagKey(el))) return false;
2855
+ if (!LAYOUT_TAGS.has(el.tagName)) return false;
3237
2856
  if (el.getAttribute("id")) return false;
3238
2857
  if (el.getAttribute("role") || el.getAttribute("aria-label")) return false;
3239
- if (hasBoundEvents(el)) return false;
3240
- if (depth <= 4 && hasBoundEventsInShallowSubtree(el)) return false;
3241
- if (isSemanticText(directText) || hasSemanticTextInSubtree(el)) return false;
2858
+ for (const attr of Array.from(el.attributes)) if (attr.name.startsWith("on")) return false;
3242
2859
  if (directText) return false;
3243
2860
  return true;
3244
2861
  }
3245
- function hasInteractiveTrackedEvents(el) {
3246
- const trackedEvents = getTrackedElementEvents(el);
3247
- if (trackedEvents.length === 0) return false;
3248
- return trackedEvents.some((eventName) => INTERACTIVE_EVENTS.has(eventName));
3249
- }
3250
2862
  function isInteractiveElement(el) {
3251
- if (INTERACTIVE_TAGS.has(getTagKey(el))) return true;
2863
+ if (INTERACTIVE_TAGS.has(el.tagName)) return true;
3252
2864
  if (el.hasAttribute("onclick")) return true;
3253
2865
  if (el.hasAttribute("role")) return true;
3254
2866
  if (el.hasAttribute("tabindex")) return true;
3255
2867
  if (el.hasAttribute("aria-label")) return true;
3256
- if (hasInteractiveTrackedEvents(el)) return true;
3257
2868
  return false;
3258
2869
  }
3259
2870
  /** 判断是否为“选项列表”容器(时间/下拉/listbox 等)。 */
@@ -3282,9 +2893,7 @@ function generateSnapshot(root = document.body, options = {}) {
3282
2893
  return "";
3283
2894
  }
3284
2895
  if (depth > maxDepth) return "";
3285
- const tagKey = getTagKey(el);
3286
- if (SKIP_TAGS.has(tagKey)) return "";
3287
- if (el.getAttribute("id") === "__SVG_SPRITE_NODE__") return "";
2896
+ if (SKIP_TAGS.has(el.tagName)) return "";
3288
2897
  if (el.hasAttribute("data-autopilot-ignore")) return "";
3289
2898
  const style = window.getComputedStyle(el);
3290
2899
  if (style.display === "none" || style.visibility === "hidden") return "";
@@ -3316,12 +2925,6 @@ function generateSnapshot(root = document.body, options = {}) {
3316
2925
  if (!attrs.includes("readonly")) attrs.push("readonly");
3317
2926
  }
3318
2927
  if (el.hasAttribute("onclick")) attrs.push("onclick");
3319
- const trackedEvents = getTrackedElementEvents(el);
3320
- if (trackedEvents.length > 0) {
3321
- const preview = trackedEvents.slice(0, 6).map(abbrevEvent).join(",");
3322
- const suffix = trackedEvents.length > 6 ? ",..." : "";
3323
- attrs.push(`listeners="${preview}${suffix}"`);
3324
- }
3325
2928
  const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
3326
2929
  if (testId) {
3327
2930
  const safeTestId = sanitizeSnapshotAttrValue(testId).slice(0, 25);
@@ -3347,7 +2950,7 @@ function generateSnapshot(root = document.body, options = {}) {
3347
2950
  }
3348
2951
  }
3349
2952
  directText = directText.trim();
3350
- if (isEmptyLayoutContainer(el, directText, depth)) {
2953
+ if (isEmptyLayoutContainer(el, directText)) {
3351
2954
  const allChildren = Array.from(el.children);
3352
2955
  const interactiveChildren = allChildren.filter(isInteractiveElement);
3353
2956
  const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
@@ -3371,10 +2974,8 @@ function generateSnapshot(root = document.body, options = {}) {
3371
2974
  let line = `${indent}[${tag}]`;
3372
2975
  if (directText) line += ` "${directText.slice(0, maxTextLength)}"`;
3373
2976
  if (attrs.length) line += ` ${attrs.join(" ")}`;
3374
- if (hashId) {
3375
- line += ` #${hashId}`;
3376
- emittedRefIds.add(hashId);
3377
- } else line += ` ref="${currentPath}"`;
2977
+ if (hashId) line += ` #${hashId}`;
2978
+ else line += ` ref="${currentPath}"`;
3378
2979
  const lines = [line];
3379
2980
  emittedNodes++;
3380
2981
  const allChildren = Array.from(el.children);
@@ -3392,7 +2993,6 @@ function generateSnapshot(root = document.body, options = {}) {
3392
2993
  return lines.join("\n");
3393
2994
  }
3394
2995
  const output = walk(root, 0, "") || "(空页面)";
3395
- refStore?.prune(emittedRefIds);
3396
2996
  if (!truncatedByNodeBudget) return output;
3397
2997
  return `${output}\n... (snapshot truncated: maxNodes=${maxNodes})`;
3398
2998
  }
@@ -3437,11 +3037,11 @@ function createPageInfoTool() {
3437
3037
  schema: Type.Object({
3438
3038
  action: Type.String({ description: "Info action: get_url | get_title | get_selection | get_viewport | snapshot | query_all" }),
3439
3039
  selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
3440
- maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 7)" })),
3040
+ maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
3441
3041
  viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
3442
3042
  pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" })),
3443
- maxNodes: Type.Optional(Type.Number({ description: "Maximum nodes to include in snapshot (default: 280)" })),
3444
- maxChildren: Type.Optional(Type.Number({ description: "Maximum children per element (default: 32)" })),
3043
+ maxNodes: Type.Optional(Type.Number({ description: "Maximum nodes to include in snapshot (default: 220)" })),
3044
+ maxChildren: Type.Optional(Type.Number({ description: "Maximum children per element (default: 25)" })),
3445
3045
  maxTextLength: Type.Optional(Type.Number({ description: "Maximum text length per node (default: 40)" })),
3446
3046
  expandOptionLists: Type.Optional(Type.Boolean({ description: "Expand option-list containers to avoid child truncation (default: false)" })),
3447
3047
  expandChildrenRefs: Type.Optional(Type.Array(Type.String({ description: "Hash refs to expand child truncation for (e.g. #abc123)" }))),
@@ -3466,11 +3066,11 @@ function createPageInfoTool() {
3466
3066
  return { content: JSON.stringify(info, null, 2) };
3467
3067
  }
3468
3068
  case "snapshot": {
3469
- const maxDepth = params.maxDepth ?? 7;
3069
+ const maxDepth = params.maxDepth ?? 6;
3470
3070
  const viewportOnly = params.viewportOnly ?? true;
3471
3071
  const pruneLayout = params.pruneLayout ?? true;
3472
- const maxNodes = params.maxNodes ?? 280;
3473
- const maxChildren = params.maxChildren ?? 32;
3072
+ const maxNodes = params.maxNodes ?? 220;
3073
+ const maxChildren = params.maxChildren ?? 25;
3474
3074
  const maxTextLength = params.maxTextLength ?? 40;
3475
3075
  const expandOptionLists = params.expandOptionLists ?? false;
3476
3076
  const expandChildrenRefs = Array.isArray(params.expandChildrenRefs) ? params.expandChildrenRefs.filter((ref) => typeof ref === "string") : void 0;
@@ -3620,7 +3220,7 @@ function createNavigateTool() {
3620
3220
  * - hash selector(如 #abc123)优先通过 RefStore 解析。
3621
3221
  * - 可见性语义与 dom-tool 保持一致(参考 Playwright 风格)。
3622
3222
  */
3623
- const DEFAULT_TIMEOUT = 6e3;
3223
+ const DEFAULT_TIMEOUT = 1e4;
3624
3224
  const POLL_INTERVAL_MS = 80;
3625
3225
  const STABLE_TICK_MS = 50;
3626
3226
  const OBSERVER_OPTIONS = {
@@ -3674,14 +3274,7 @@ function resolveSelector(selector) {
3674
3274
  const store = getActiveRefStore();
3675
3275
  if (store) {
3676
3276
  const id = selector.slice(1);
3677
- if (store.has(id)) {
3678
- const el = store.get(id);
3679
- if (!el || !el.isConnected) {
3680
- store.delete(id);
3681
- return null;
3682
- }
3683
- return el;
3684
- }
3277
+ if (store.has(id)) return store.get(id) ?? null;
3685
3278
  }
3686
3279
  }
3687
3280
  try {
@@ -3819,7 +3412,7 @@ function createWaitTool() {
3819
3412
  selector: Type.Optional(Type.String({ description: "CSS selector for wait_for_selector/wait_for_hidden" })),
3820
3413
  state: Type.Optional(Type.String({ description: "Selector state for wait_for_selector: attached | visible | hidden | detached (default: attached)" })),
3821
3414
  text: Type.Optional(Type.String({ description: "Text to wait for in wait_for_text" })),
3822
- timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 6000)" })),
3415
+ timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" })),
3823
3416
  quietMs: Type.Optional(Type.Number({ description: "Quiet window for wait_for_stable in milliseconds (default: 300)" }))
3824
3417
  }),
3825
3418
  execute: async (params) => {
@@ -4025,29 +3618,6 @@ var RefStore = class {
4025
3618
  has(id) {
4026
3619
  return this.map.has(id);
4027
3620
  }
4028
- /** 删除指定 hash ID 映射,返回是否删除成功。 */
4029
- delete(id) {
4030
- return this.map.delete(id);
4031
- }
4032
- /**
4033
- * 清理失效引用:
4034
- * - 仅保留 keepIds 中的映射(若提供)
4035
- * - 自动移除已脱离文档(isConnected=false)的元素
4036
- *
4037
- * @returns 被移除的映射数量
4038
- */
4039
- prune(keepIds) {
4040
- let removed = 0;
4041
- for (const [id, el] of this.map.entries()) {
4042
- const shouldKeepById = keepIds ? keepIds.has(id) : true;
4043
- const isConnected = el.isConnected;
4044
- if (!shouldKeepById || !isConnected) {
4045
- this.map.delete(id);
4046
- removed++;
4047
- }
4048
- }
4049
- return removed;
4050
- }
4051
3621
  /** 清空所有映射 */
4052
3622
  clear() {
4053
3623
  this.map.clear();
@@ -4179,7 +3749,6 @@ function registerToolHandler(executors) {
4179
3749
  * │ └──────────┘ └────────────┘ └──────────────┘ │
4180
3750
  * └──────────────────────────────────────────────────┘
4181
3751
  */
4182
- installEventListenerTracking();
4183
3752
  var WebAgent = class WebAgent {
4184
3753
  /** 默认系统提示词 key(兼容旧版 setSystemPrompt(prompt))。 */
4185
3754
  static DEFAULT_SYSTEM_PROMPT_KEY = "default";
@@ -4212,8 +3781,6 @@ var WebAgent = class WebAgent {
4212
3781
  autoSnapshot;
4213
3782
  /** 快照选项 */
4214
3783
  snapshotOptions;
4215
- /** 轮次后稳定等待配置 */
4216
- roundStabilityWait;
4217
3784
  /** 工具注册表实例 — 每个 WebAgent 拥有独立的工具集 */
4218
3785
  registry = new ToolRegistry();
4219
3786
  /** 事件回调 — 绑定后可实时获取 Agent 进度,用于 UI 展示 */
@@ -4230,7 +3797,6 @@ var WebAgent = class WebAgent {
4230
3797
  this.memory = options.memory ?? false;
4231
3798
  this.autoSnapshot = options.autoSnapshot ?? true;
4232
3799
  this.snapshotOptions = options.snapshotOptions ?? {};
4233
- this.roundStabilityWait = options.roundStabilityWait;
4234
3800
  if (typeof options.systemPrompt === "string") this.setSystemPrompt(options.systemPrompt);
4235
3801
  else if (options.systemPrompt && typeof options.systemPrompt === "object") this.setSystemPrompts(options.systemPrompt);
4236
3802
  }
@@ -4423,7 +3989,6 @@ var WebAgent = class WebAgent {
4423
3989
  history: this.memory ? this.history : void 0,
4424
3990
  dryRun: this.dryRun,
4425
3991
  maxRounds: this.maxRounds,
4426
- roundStabilityWait: this.roundStabilityWait,
4427
3992
  callbacks: wrappedCallbacks
4428
3993
  });
4429
3994
  if (this.memory) this.history = result.messages;