agentpage 0.0.15 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -73,6 +73,34 @@ function hasToolError(result) {
73
73
  *
74
74
  * 默认关闭 viewportOnly,优先完整性。
75
75
  * viewportOnly defaults to false to prioritize completeness.
76
+ *
77
+ * 步骤(中)/ Steps (EN):
78
+ * 1) 合并调用方 options 与默认值(深度/裁剪/剪枝/节点上限等)。
79
+ * 2) 分发 `page_info.snapshot` 获取当前 DOM 文本快照。
80
+ * 3) 使用 `toContentString` 归一化输出,避免 provider 差异导致结构不一致。
81
+ * 4) 返回稳定字符串给 loop,供后续注入消息与统计。
82
+ *
83
+ * 默认参数意图(中)/ Default parameter rationale (EN):
84
+ * - `maxDepth=8`: 保留足够层级,减少关键控件被截断。
85
+ * - `viewportOnly=false`: 优先完整性,避免误判“元素不存在”。
86
+ * - `pruneLayout=true`: 抑制纯布局噪声,降低 token 压力。
87
+ * - `maxNodes=500` / `maxChildren=30`: 控制体积上限,兼顾可读性。
88
+ * - `maxTextLength=40`: 防止长文本淹没结构信息。
89
+ *
90
+ * 压缩/剪枝是怎么做的(中)/ How compression & pruning works in practice (EN):
91
+ * - `viewportOnly=true` 时:仅保留与视口相交元素(根层容器保留),完全视口外元素跳过。
92
+ * - `pruneLayout=true` 时:无 id/无语义/无交互/无直接文本的布局容器会被“折叠”,
93
+ * 子节点直接提升输出,减少无意义层级。
94
+ * - `maxNodes`:全局节点预算,超限后停止继续遍历并追加 truncation 提示。
95
+ * - `maxChildren`:每个父节点只保留前 N 个子元素,其余用 `... (n children omitted)` 汇总。
96
+ * - `maxTextLength`:节点文本按长度截断,避免长段文案占满上下文。
97
+ * - 交互优先排序:优先输出按钮/输入框/链接等交互元素,再输出普通元素。
98
+ * - 属性压缩:仅保留关键属性(如 id、关键 class、交互属性、布尔状态、val),减少冗余 token。
99
+ *
100
+ * 输入/输出(中)/ I/O contract (EN):
101
+ * - In: `ToolRegistry` + 可选快照参数
102
+ * - Out: 归一化后的快照字符串(始终 string)
103
+ * - Side effects: 无本地状态写入;仅依赖工具调用结果
76
104
  */
77
105
  async function readPageSnapshot(registry, options) {
78
106
  return toContentString((await registry.dispatch("page_info", {
@@ -85,7 +113,14 @@ async function readPageSnapshot(registry, options) {
85
113
  maxTextLength: options?.maxTextLength ?? 40
86
114
  })).content);
87
115
  }
88
- /** 包裹快照(中)/ Wrap snapshot with boundary markers (EN). */
116
+ /**
117
+ * 包裹快照(中)/ Wrap snapshot with boundary markers (EN).
118
+ *
119
+ * 作用(中)/ Purpose (EN):
120
+ * - 为快照加 `SNAPSHOT_START/END` 边界,便于后续正则定位。
121
+ * - 支持去重与旧快照剥离,防止多轮 token 累积。
122
+ * - 仅做纯字符串变换,不访问外部状态。
123
+ */
89
124
  function wrapSnapshot(snapshot) {
90
125
  return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
91
126
  }
@@ -101,6 +136,11 @@ function containsSnapshot(text) {
101
136
  }
102
137
  /**
103
138
  * 剥离旧快照(中)/ Strip outdated snapshot blocks from system prompt (EN).
139
+ *
140
+ * 说明(中)/ Notes (EN):
141
+ * - 当 prompt 中已有历史快照时,将其替换为过期占位文本。
142
+ * - 让每轮真正生效的只有“最新注入快照”,减少冲突上下文。
143
+ * - 这是 prompt 级清理;不会触碰 tool trace 中的原始结果对象。
104
144
  */
105
145
  function stripSnapshotFromPrompt(prompt) {
106
146
  if (!containsSnapshot(prompt)) return prompt;
@@ -162,7 +202,7 @@ function formatToolResultBrief(result) {
162
202
  * - `previousRoundTasks`:上一轮已执行的任务数组,避免重复计划。
163
203
  * - 消息中要求模型输出 `REMAINING: ...` 或 `REMAINING: DONE`,供下一轮继续消费。
164
204
  */
165
- function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks) {
205
+ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint) {
166
206
  const messages = history ? [...history] : [];
167
207
  const allowAgentUiInteraction = isExplicitAgentUiRequest(userMessage);
168
208
  const activeInstruction = remainingInstruction && remainingInstruction.trim() ? remainingInstruction.trim() : userMessage;
@@ -176,6 +216,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
176
216
  ];
177
217
  if (currentUrl) parts.push("", `URL: ${currentUrl}`);
178
218
  if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
219
+ if (protocolViolationHint) parts.push("", protocolViolationHint);
179
220
  messages.push({
180
221
  role: "user",
181
222
  content: parts.join("\n")
@@ -215,6 +256,8 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
215
256
  if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
216
257
  else contextParts.push("", "If the goal is fully done, reply with a short summary (no tool calls).");
217
258
  if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous round planned task array (already executed):", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`));
259
+ if (previousRoundPlannedTasks && previousRoundPlannedTasks.length > 0) contextParts.push("", "Previous round model planned task array (before execution):", ...previousRoundPlannedTasks.map((task, index) => `${index + 1}. ${task}`));
260
+ if (previousRoundModelOutput) contextParts.push("", "Previous round model output (normalized, for task reduction input):", previousRoundModelOutput);
218
261
  contextParts.push("", "After this round, include one plain text line:", "REMAINING: <new remaining instruction after this-round actions>", "or REMAINING: DONE");
219
262
  const lastEntry = trace[trace.length - 1];
220
263
  if (hasToolError(lastEntry.result)) {
@@ -222,6 +265,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
222
265
  if (stripped && stripped.length < 300) contextParts.push("", "Last error: " + stripped);
223
266
  }
224
267
  if (currentUrl) contextParts.push("", `URL: ${currentUrl}`);
268
+ if (protocolViolationHint) contextParts.push("", protocolViolationHint);
225
269
  if (latestSnapshot) contextParts.push("", "## Latest DOM snapshot", "Use hash IDs from this snapshot. Do NOT call page_info — this is already the latest.", wrapSnapshot(latestSnapshot));
226
270
  messages.push({
227
271
  role: "user",
@@ -385,9 +429,12 @@ async function executeAgentLoop(params) {
385
429
  let outputTokens = 0;
386
430
  let remainingInstruction = message.trim();
387
431
  let previousRoundTasks = [];
432
+ let previousRoundPlannedTasks = [];
433
+ let previousRoundModelOutput = "";
388
434
  let lastPlannedBatchKey = "";
389
435
  let consecutiveSamePlannedBatch = 0;
390
436
  let lastRoundHadError = false;
437
+ let protocolViolationHint;
391
438
  let recoveryCount = 0;
392
439
  let redundantInterceptCount = 0;
393
440
  let pendingNotFoundRetry;
@@ -449,6 +496,20 @@ async function executeAgentLoop(params) {
449
496
  return `${tc.name}:${inputText}`;
450
497
  });
451
498
  /**
499
+ * 规范化模型文本输出(中)/ Normalize model text for next-round input (EN).
500
+ *
501
+ * 优先保留 REMAINING 行;否则截断首段文本,避免长篇规划污染下一轮输入。
502
+ * Prefer REMAINING line; otherwise keep a short excerpt to avoid long planning spillover.
503
+ */
504
+ const normalizeModelOutput = (text) => {
505
+ if (!text) return "";
506
+ const trimmed = text.trim();
507
+ if (!trimmed) return "";
508
+ const remainingMatch = trimmed.match(/REMAINING\s*:\s*([\s\S]*)$/i);
509
+ if (remainingMatch) return `REMAINING: ${remainingMatch[1].trim()}`;
510
+ return (trimmed.split(/\n\s*\n/)[0]?.trim() ?? trimmed).slice(0, 220);
511
+ };
512
+ /**
452
513
  * 判定动作是否会触发 DOM 结构变化(中)/ Whether action may cause DOM-shape change (EN).
453
514
  *
454
515
  * 触发后应强制断轮,等待下一轮新快照继续。
@@ -490,8 +551,8 @@ async function executeAgentLoop(params) {
490
551
  /**
491
552
  * 推进下一轮描述(中)/ Derive next-round instruction from model text (EN).
492
553
  *
493
- * 优先 REMAINING 协议;若未提供,则把本轮 content 视为“更新后的任务描述”。
494
- * Priority: REMAINING protocol first; otherwise treat current content as updated instruction.
554
+ * 优先 REMAINING 协议;若未提供,则保持当前 remaining 不变。
555
+ * Priority: REMAINING protocol first; otherwise keep current remaining instruction unchanged.
495
556
  */
496
557
  const deriveNextInstruction = (text, currentInstruction) => {
497
558
  const parsed = parseRemainingInstruction(text);
@@ -504,12 +565,26 @@ async function executeAgentLoop(params) {
504
565
  hasRemainingProtocol: false
505
566
  };
506
567
  };
568
+ /**
569
+ * 启发式任务剔除(中)/ Heuristic remaining reduction for linear instructions (EN).
570
+ *
571
+ * 在 REMAINING 缺失但本轮有执行动作时,按“线性片段”剔除已执行步数,避免下一轮继续携带整段原任务。
572
+ * When REMAINING is missing but actions were executed, drop executed step count from a linearized instruction.
573
+ */
574
+ const reduceRemainingHeuristically = (currentInstruction, executedCount) => {
575
+ if (!currentInstruction.trim() || executedCount <= 0) return currentInstruction;
576
+ const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").replace(/[,,。;;]/g, " 然后 ").split(/\s*(?:然后|再|并且|并|接着|随后|之后)\s*/g).map((part) => part.trim()).filter(Boolean);
577
+ if (parts.length <= 1) return currentInstruction;
578
+ const nextParts = parts.slice(Math.min(executedCount, parts.length));
579
+ if (nextParts.length === 0) return "";
580
+ return nextParts.join(" -> ");
581
+ };
507
582
  for (let round = 0; round < maxRounds; round++) {
508
583
  callbacks?.onRound?.(round);
509
584
  usedRounds = round + 1;
510
585
  if (!pageContext.latestSnapshot) await refreshSnapshot();
511
586
  const effectivePrompt = stripSnapshotFromPrompt(systemPrompt);
512
- const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks);
587
+ const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint);
513
588
  if (pendingNotFoundRetry && pendingNotFoundRetry.tasks.length > 0) chatMessages.push({
514
589
  role: "user",
515
590
  content: [
@@ -528,8 +603,7 @@ async function executeAgentLoop(params) {
528
603
  });
529
604
  inputTokens += response.usage?.inputTokens ?? 0;
530
605
  outputTokens += response.usage?.outputTokens ?? 0;
531
- const nextInstructionState = deriveNextInstruction(response.text, remainingInstruction);
532
- remainingInstruction = nextInstructionState.nextInstruction;
606
+ const parsedInstructionState = deriveNextInstruction(response.text, remainingInstruction);
533
607
  if (!response.toolCalls || response.toolCalls.length === 0) {
534
608
  if (pendingNotFoundRetry) {
535
609
  const unresolvedHint = response.text?.toLowerCase() ?? "";
@@ -545,10 +619,29 @@ async function executeAgentLoop(params) {
545
619
  }
546
620
  pendingNotFoundRetry = void 0;
547
621
  }
622
+ if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
623
+ if (remainingInstruction.trim().length > 0 && round < maxRounds - 1) {
624
+ protocolViolationHint = [
625
+ "Protocol violation in previous round:",
626
+ "- Remaining task is not DONE, but no tool calls were returned.",
627
+ "This round MUST do one of:",
628
+ "1) Return actionable tool calls for visible targets; or",
629
+ "2) If truly complete, return a short summary and EXACTLY `REMAINING: DONE`.",
630
+ "Do NOT output planning/explaining text."
631
+ ].join("\n");
632
+ lastRoundHadError = true;
633
+ await refreshSnapshot();
634
+ continue;
635
+ }
548
636
  finalReply = response.text ?? "";
549
637
  if (finalReply) callbacks?.onText?.(finalReply);
550
638
  break;
551
639
  }
640
+ protocolViolationHint = void 0;
641
+ const plannedTasksCurrentRound = buildTaskArray(response.toolCalls.map((tc) => ({
642
+ name: tc.name,
643
+ input: tc.input
644
+ })));
552
645
  const plannedBatchKey = JSON.stringify(response.toolCalls.map((tc) => ({
553
646
  name: tc.name,
554
647
  input: tc.input
@@ -617,9 +710,16 @@ async function executeAgentLoop(params) {
617
710
  tasks: roundMissingTasks
618
711
  };
619
712
  else pendingNotFoundRetry = void 0;
620
- if (!nextInstructionState.hasRemainingProtocol) roundHasError = true;
713
+ if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
714
+ else {
715
+ const nextByHeuristic = reduceRemainingHeuristically(remainingInstruction, executedTaskCalls.length);
716
+ if (nextByHeuristic !== remainingInstruction) remainingInstruction = nextByHeuristic;
717
+ else roundHasError = true;
718
+ }
719
+ previousRoundModelOutput = parsedInstructionState.hasRemainingProtocol ? normalizeModelOutput(response.text) : `REMAINING: ${remainingInstruction || "DONE"}`;
621
720
  lastRoundHadError = roundHasError;
622
721
  previousRoundTasks = buildTaskArray(executedTaskCalls);
722
+ previousRoundPlannedTasks = plannedTasksCurrentRound;
623
723
  const idleResult = detectIdleLoop(executedTaskCalls.map((tc) => tc.name), consecutiveReadOnlyRounds);
624
724
  if (idleResult === -1) {
625
725
  finalReply = response.text || "任务已完成。";
@@ -1307,10 +1407,13 @@ function buildSystemPrompt(params = {}) {
1307
1407
  * 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
1308
1408
  * 运行环境:浏览器 Content Script。
1309
1409
  *
1310
- * 支持 12 种动作:
1410
+ * 支持 15 种动作:
1311
1411
  * click — 点击元素
1312
1412
  * fill — 填写可编辑控件(input/textarea/select/contenteditable)
1313
1413
  * select_option — 选择下拉框选项(value/label)
1414
+ * clear — 清空输入控件
1415
+ * check — 勾选 checkbox/radio
1416
+ * uncheck — 取消勾选 checkbox
1314
1417
  * type — 逐字符模拟键入
1315
1418
  * focus — 聚焦元素
1316
1419
  * hover — 鼠标悬停(触发 mouseenter/mouseover)
@@ -1388,16 +1491,62 @@ function resolveWaitMs(params) {
1388
1491
  * 模拟真实用户输入:触发 input、change 事件,兼容 React/Vue 等框架。
1389
1492
  */
1390
1493
  function dispatchInputEvents(el) {
1391
- el.dispatchEvent(new Event("input", {
1392
- bubbles: true,
1393
- cancelable: true
1394
- }));
1494
+ try {
1495
+ el.dispatchEvent(new InputEvent("input", {
1496
+ bubbles: true,
1497
+ cancelable: true,
1498
+ inputType: "insertText",
1499
+ data: null
1500
+ }));
1501
+ } catch {
1502
+ el.dispatchEvent(new Event("input", {
1503
+ bubbles: true,
1504
+ cancelable: true
1505
+ }));
1506
+ }
1395
1507
  el.dispatchEvent(new Event("change", {
1396
1508
  bubbles: true,
1397
1509
  cancelable: true
1398
1510
  }));
1399
1511
  }
1400
1512
  /**
1513
+ * 使用原生 setter 写入表单值,提升对受控组件(React/Vue 等)的兼容性。
1514
+ */
1515
+ function setNativeEditableValue(el, value) {
1516
+ const proto = el instanceof HTMLInputElement ? HTMLInputElement.prototype : el instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLSelectElement.prototype;
1517
+ const descriptor = Object.getOwnPropertyDescriptor(proto, "value");
1518
+ if (descriptor?.set) {
1519
+ descriptor.set.call(el, value);
1520
+ return;
1521
+ }
1522
+ el.value = value;
1523
+ }
1524
+ /**
1525
+ * 读取可编辑元素当前值。
1526
+ */
1527
+ function getEditableValue(el) {
1528
+ return el.value ?? "";
1529
+ }
1530
+ /**
1531
+ * 将常见 key 映射为更接近浏览器语义的 KeyboardEvent.code。
1532
+ */
1533
+ function resolveKeyboardCode(key) {
1534
+ return {
1535
+ Enter: "Enter",
1536
+ Escape: "Escape",
1537
+ Esc: "Escape",
1538
+ Tab: "Tab",
1539
+ Space: "Space",
1540
+ " ": "Space",
1541
+ Backspace: "Backspace",
1542
+ Delete: "Delete",
1543
+ ArrowUp: "ArrowUp",
1544
+ ArrowDown: "ArrowDown",
1545
+ ArrowLeft: "ArrowLeft",
1546
+ ArrowRight: "ArrowRight"
1547
+ }[key] ?? key;
1548
+ }
1549
+ /**
1401
1550
  * 生成元素的可读描述,用于在操作结果中展示实际命中的 DOM 节点。
1402
1551
  * 格式:<tag#id.class> "文本" [attr=val, ...]
1403
1552
  */
@@ -1405,7 +1554,7 @@ function describeElement(el) {
1405
1554
  const tag = el.tagName.toLowerCase();
1406
1555
  const id = el.id ? `#${el.id}` : "";
1407
1556
  const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
1408
- const text = el.textContent?.trim().slice(0, 40) ?? "";
1557
+ const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
1409
1558
  const textHint = text ? ` "${text}"` : "";
1410
1559
  const hints = [];
1411
1560
  for (const attr of [
@@ -1418,30 +1567,135 @@ function describeElement(el) {
1418
1567
  const val = el.getAttribute(attr);
1419
1568
  if (val) hints.push(`${attr}=${val}`);
1420
1569
  }
1570
+ if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
1421
1571
  return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
1422
1572
  }
1573
+ function isElementVisible(el) {
1574
+ if (!(el instanceof HTMLElement || el instanceof SVGElement)) return false;
1575
+ if (!el.isConnected) return false;
1576
+ const style = window.getComputedStyle(el);
1577
+ if (style.display === "none" || style.visibility === "hidden") return false;
1578
+ if (style.opacity === "0") return false;
1579
+ const rect = el.getBoundingClientRect();
1580
+ return rect.width > 0 && rect.height > 0;
1581
+ }
1582
+ function isElementDisabled(el) {
1583
+ if (!(el instanceof HTMLElement)) return false;
1584
+ if (el.hasAttribute("disabled")) return true;
1585
+ if (el.getAttribute("aria-disabled") === "true") return true;
1586
+ if ("disabled" in el && typeof el.disabled === "boolean") return Boolean(el.disabled);
1587
+ return false;
1588
+ }
1589
+ function isEditableElement(el) {
1590
+ if (el instanceof HTMLTextAreaElement) return !el.readOnly;
1591
+ if (el instanceof HTMLInputElement) return !new Set([
1592
+ "checkbox",
1593
+ "radio",
1594
+ "file",
1595
+ "button",
1596
+ "submit",
1597
+ "reset"
1598
+ ]).has(el.type) && !el.readOnly;
1599
+ if (el instanceof HTMLSelectElement) return true;
1600
+ return el instanceof HTMLElement && el.isContentEditable;
1601
+ }
1602
+ function ensureActionable(el, action, selector) {
1603
+ if (!el.isConnected) return {
1604
+ content: `"${selector}" 元素已脱离文档,无法执行 ${action}`,
1605
+ details: {
1606
+ error: true,
1607
+ code: "ELEMENT_DETACHED",
1608
+ action,
1609
+ selector
1610
+ }
1611
+ };
1612
+ if (!new Set(["get_text", "get_attr"]).has(action) && !isElementVisible(el)) return {
1613
+ content: `"${selector}" 元素不可见,无法执行 ${action}`,
1614
+ details: {
1615
+ error: true,
1616
+ code: "ELEMENT_NOT_VISIBLE",
1617
+ action,
1618
+ selector
1619
+ }
1620
+ };
1621
+ if (new Set([
1622
+ "click",
1623
+ "fill",
1624
+ "type",
1625
+ "press",
1626
+ "select_option",
1627
+ "clear",
1628
+ "check",
1629
+ "uncheck"
1630
+ ]).has(action) && isElementDisabled(el)) return {
1631
+ content: `"${selector}" 元素已禁用,无法执行 ${action}`,
1632
+ details: {
1633
+ error: true,
1634
+ code: "ELEMENT_DISABLED",
1635
+ action,
1636
+ selector
1637
+ }
1638
+ };
1639
+ if ([
1640
+ "fill",
1641
+ "type",
1642
+ "clear"
1643
+ ].includes(action) && !isEditableElement(el)) return {
1644
+ content: `"${selector}" 不是可编辑元素,无法执行 ${action}`,
1645
+ details: {
1646
+ error: true,
1647
+ code: "UNSUPPORTED_FILL_TARGET",
1648
+ action,
1649
+ selector
1650
+ }
1651
+ };
1652
+ return null;
1653
+ }
1654
+ function isOptionCandidateVisible(el) {
1655
+ if (!(el instanceof HTMLElement)) return false;
1656
+ if (!isElementVisible(el)) return false;
1657
+ return (el.textContent?.trim() ?? "").length > 0;
1658
+ }
1659
+ function findVisibleOptionByText(text) {
1660
+ const target = text.trim().toLowerCase();
1661
+ if (!target) return null;
1662
+ const nodes = Array.from(document.querySelectorAll("[role=\"option\"], .bk-select-option, .bk-option, [data-option], li, option"));
1663
+ for (const node of nodes) {
1664
+ if (!isOptionCandidateVisible(node)) continue;
1665
+ if ((node.textContent?.trim().toLowerCase() ?? "") === target) return node;
1666
+ }
1667
+ for (const node of nodes) {
1668
+ if (!isOptionCandidateVisible(node)) continue;
1669
+ if ((node.textContent?.trim().toLowerCase() ?? "").includes(target)) return node;
1670
+ }
1671
+ return null;
1672
+ }
1423
1673
  function createDomTool() {
1424
1674
  return {
1425
1675
  name: "dom",
1426
1676
  description: [
1427
1677
  "Perform DOM operations on the current page.",
1428
- "Actions: click, fill, select_option, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
1678
+ "Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
1429
1679
  "Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector."
1430
1680
  ].join(" "),
1431
1681
  schema: Type.Object({
1432
- action: Type.String({ description: "DOM action: click | fill | select_option | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
1682
+ action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
1433
1683
  selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
1434
1684
  value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions" })),
1435
1685
  key: Type.Optional(Type.String({ description: "Key name for press action (e.g. Enter, Escape, Tab, ArrowDown, ArrowUp, Backspace, Delete, Space)" })),
1686
+ label: Type.Optional(Type.String({ description: "Label text for select_option action (fallback when value is not provided)" })),
1687
+ index: Type.Optional(Type.Number({ description: "0-based option index for select_option action" })),
1436
1688
  attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr actions" })),
1437
1689
  className: Type.Optional(Type.String({ description: "CSS class name for add_class/remove_class" })),
1438
1690
  waitMs: Type.Optional(Type.Number({ description: "Optional wait timeout in ms before action (default: 1000). Use 0 to disable waiting." })),
1439
- waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." }))
1691
+ waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." })),
1692
+ force: Type.Optional(Type.Boolean({ description: "Skip actionability checks for interaction actions (default false)." }))
1440
1693
  }),
1441
1694
  execute: async (params) => {
1442
1695
  const action = params.action;
1443
1696
  const selector = params.selector;
1444
1697
  const waitMs = resolveWaitMs(params);
1698
+ const force = params.force === true;
1445
1699
  if (!selector) return { content: "缺少 selector 参数" };
1446
1700
  let el;
1447
1701
  if (waitMs > 0) {
@@ -1481,6 +1735,10 @@ function createDomTool() {
1481
1735
  el = elOrError;
1482
1736
  }
1483
1737
  try {
1738
+ if (!force) {
1739
+ const checkResult = ensureActionable(el, action, selector);
1740
+ if (checkResult) return checkResult;
1741
+ }
1484
1742
  switch (action) {
1485
1743
  case "click":
1486
1744
  if (el instanceof HTMLOptionElement) {
@@ -1494,6 +1752,22 @@ function createDomTool() {
1494
1752
  }
1495
1753
  if (el instanceof HTMLElement) {
1496
1754
  el.focus();
1755
+ el.dispatchEvent(new PointerEvent("pointerdown", {
1756
+ bubbles: true,
1757
+ cancelable: true
1758
+ }));
1759
+ el.dispatchEvent(new MouseEvent("mousedown", {
1760
+ bubbles: true,
1761
+ cancelable: true
1762
+ }));
1763
+ el.dispatchEvent(new PointerEvent("pointerup", {
1764
+ bubbles: true,
1765
+ cancelable: true
1766
+ }));
1767
+ el.dispatchEvent(new MouseEvent("mouseup", {
1768
+ bubbles: true,
1769
+ cancelable: true
1770
+ }));
1497
1771
  el.click();
1498
1772
  } else el.dispatchEvent(new MouseEvent("click", { bubbles: true }));
1499
1773
  return { content: `已点击 ${describeElement(el)}` };
@@ -1521,22 +1795,58 @@ function createDomTool() {
1521
1795
  if (el instanceof HTMLElement) el.focus();
1522
1796
  const eventInit = {
1523
1797
  key,
1524
- code: key,
1798
+ code: resolveKeyboardCode(key),
1525
1799
  bubbles: true,
1526
1800
  cancelable: true
1527
1801
  };
1528
- el.dispatchEvent(new KeyboardEvent("keydown", eventInit));
1802
+ const keydownAllowed = el.dispatchEvent(new KeyboardEvent("keydown", eventInit));
1529
1803
  el.dispatchEvent(new KeyboardEvent("keypress", eventInit));
1530
1804
  el.dispatchEvent(new KeyboardEvent("keyup", eventInit));
1805
+ if (keydownAllowed && key === "Enter") {
1806
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) (el.form ?? el.closest("form"))?.dispatchEvent(new Event("submit", {
1807
+ bubbles: true,
1808
+ cancelable: true
1809
+ }));
1810
+ }
1531
1811
  return { content: `已在 ${describeElement(el)} 上按下 ${key}` };
1532
1812
  }
1533
1813
  case "fill": {
1534
1814
  const value = params.value;
1535
1815
  if (value === void 0) return { content: "缺少 value 参数" };
1536
1816
  if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
1817
+ if (el instanceof HTMLInputElement) {
1818
+ if (new Set([
1819
+ "checkbox",
1820
+ "radio",
1821
+ "file",
1822
+ "button",
1823
+ "submit",
1824
+ "reset"
1825
+ ]).has(el.type)) return {
1826
+ content: `"${selector}" 为 input[type=${el.type}],不支持 fill;请使用 click/press/select_option 等动作。`,
1827
+ details: {
1828
+ error: true,
1829
+ code: "UNSUPPORTED_FILL_TARGET",
1830
+ action,
1831
+ selector
1832
+ }
1833
+ };
1834
+ }
1537
1835
  el.focus();
1538
- el.value = value;
1836
+ setNativeEditableValue(el, value);
1539
1837
  dispatchInputEvents(el);
1838
+ const actualValue = getEditableValue(el);
1839
+ if (actualValue !== value) return {
1840
+ content: `"${selector}" 填写后值不一致:期望 "${value}",实际 "${actualValue}"`,
1841
+ details: {
1842
+ error: true,
1843
+ code: "FILL_NOT_APPLIED",
1844
+ action,
1845
+ selector,
1846
+ expected: value,
1847
+ actual: actualValue
1848
+ }
1849
+ };
1540
1850
  } else if (el instanceof HTMLSelectElement) {
1541
1851
  el.focus();
1542
1852
  let matched = false;
@@ -1555,6 +1865,18 @@ function createDomTool() {
1555
1865
  }
1556
1866
  if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
1557
1867
  dispatchInputEvents(el);
1868
+ const actualValue = getEditableValue(el);
1869
+ if (actualValue !== el.value) return {
1870
+ content: `"${selector}" 下拉框状态异常,未确认写入`,
1871
+ details: {
1872
+ error: true,
1873
+ code: "FILL_NOT_APPLIED",
1874
+ action,
1875
+ selector,
1876
+ expected: value,
1877
+ actual: actualValue
1878
+ }
1879
+ };
1558
1880
  } else if (el instanceof HTMLElement && el.isContentEditable) {
1559
1881
  el.focus();
1560
1882
  el.textContent = value;
@@ -1564,27 +1886,83 @@ function createDomTool() {
1564
1886
  }
1565
1887
  case "select_option": {
1566
1888
  const value = params.value;
1567
- if (value === void 0) return { content: "缺少 value 参数" };
1568
- if (!(el instanceof HTMLSelectElement)) return { content: `"${selector}" 不是下拉框元素` };
1889
+ const label = params.label;
1890
+ const index = typeof params.index === "number" ? Math.floor(params.index) : void 0;
1891
+ if (value === void 0 && label === void 0 && index === void 0) return { content: "缺少可选参数:value 或 label 或 index" };
1892
+ if (!(el instanceof HTMLSelectElement)) {
1893
+ if (!(el instanceof HTMLElement)) return { content: `"${selector}" 不是下拉框元素` };
1894
+ el.focus();
1895
+ el.click();
1896
+ const wanted = (label ?? value ?? "").trim();
1897
+ if (!wanted) return { content: `"${selector}" 为自定义下拉时,需提供 value 或 label` };
1898
+ const option = findVisibleOptionByText(wanted);
1899
+ if (!option) return {
1900
+ content: `未找到与 "${wanted}" 匹配的可见下拉选项(自定义下拉)`,
1901
+ details: {
1902
+ error: true,
1903
+ code: "OPTION_NOT_FOUND",
1904
+ action,
1905
+ selector,
1906
+ wanted
1907
+ }
1908
+ };
1909
+ option.click();
1910
+ return { content: `已在自定义下拉中选择 "${wanted}"` };
1911
+ }
1569
1912
  el.focus();
1570
- let matched = false;
1571
- for (const option of Array.from(el.options)) if (option.value === value) {
1572
- el.value = option.value;
1573
- matched = true;
1574
- break;
1913
+ const options = Array.from(el.options);
1914
+ let selectedOption;
1915
+ if (value !== void 0) selectedOption = options.find((option) => option.value === value);
1916
+ if (!selectedOption && label !== void 0) {
1917
+ const normalizedLabel = label.trim().toLowerCase();
1918
+ selectedOption = options.find((option) => option.text.trim().toLowerCase() === normalizedLabel);
1575
1919
  }
1576
- if (!matched) {
1577
- const normalized = value.trim().toLowerCase();
1578
- for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
1579
- el.value = option.value;
1580
- matched = true;
1581
- break;
1582
- }
1920
+ if (!selectedOption && value !== void 0) {
1921
+ const normalizedValueAsLabel = value.trim().toLowerCase();
1922
+ selectedOption = options.find((option) => option.text.trim().toLowerCase() === normalizedValueAsLabel);
1923
+ }
1924
+ if (!selectedOption && index !== void 0) {
1925
+ if (index < 0 || index >= options.length) return { content: `"${selector}" 下拉框不存在 index=${index} 的选项` };
1926
+ selectedOption = options[index];
1583
1927
  }
1584
- if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
1928
+ if (!selectedOption) return { content: `"${selector}" 下拉框中不存在选项 "${value ?? label ?? `index=${index}`}"` };
1929
+ if (selectedOption.disabled) return { content: `"${selector}" 目标选项已禁用:${selectedOption.value}` };
1930
+ if (!el.multiple) for (const option of options) option.selected = false;
1931
+ selectedOption.selected = true;
1932
+ el.value = selectedOption.value;
1585
1933
  dispatchInputEvents(el);
1586
- return { content: `已选择 ${describeElement(el)}: "${el.value}"` };
1934
+ return { content: `已选择 ${describeElement(el)}: value="${selectedOption.value}", label="${selectedOption.text.trim()}"` };
1587
1935
  }
1936
+ case "clear":
1937
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement) {
1938
+ el.focus();
1939
+ setNativeEditableValue(el, "");
1940
+ dispatchInputEvents(el);
1941
+ return { content: `已清空 ${describeElement(el)}` };
1942
+ }
1943
+ if (el instanceof HTMLElement && el.isContentEditable) {
1944
+ el.focus();
1945
+ el.textContent = "";
1946
+ el.dispatchEvent(new Event("input", { bubbles: true }));
1947
+ return { content: `已清空 ${describeElement(el)}` };
1948
+ }
1949
+ return { content: `"${selector}" 不是可清空元素` };
1950
+ case "check":
1951
+ if (!(el instanceof HTMLInputElement) || el.type !== "checkbox" && el.type !== "radio") return { content: `"${selector}" 不是 checkbox/radio` };
1952
+ el.focus();
1953
+ if (!el.checked) {
1954
+ el.checked = true;
1955
+ dispatchInputEvents(el);
1956
+ }
1957
+ return { content: `已勾选 ${describeElement(el)}` };
1958
+ case "uncheck":
1959
+ if (!(el instanceof HTMLInputElement) || el.type !== "checkbox") return { content: `"${selector}" 不是 checkbox` };
1960
+ el.focus();
1961
+ if (el.checked) {
1962
+ el.checked = false;
1963
+ dispatchInputEvents(el);
1964
+ }
1965
+ return { content: `已取消勾选 ${describeElement(el)}` };
1588
1966
  case "type": {
1589
1967
  const value = params.value;
1590
1968
  if (value === void 0) return { content: "缺少 value 参数" };
@@ -1837,6 +2215,12 @@ function generateSnapshot(root = document.body, options = {}) {
1837
2215
  if (val) attrs.push(`${attr}="${val}"`);
1838
2216
  }
1839
2217
  for (const attr of BOOLEAN_ATTRS) if (el.hasAttribute(attr)) attrs.push(attr);
2218
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement || el instanceof HTMLButtonElement) {
2219
+ if (el.disabled && !attrs.includes("disabled")) attrs.push("disabled");
2220
+ }
2221
+ if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.readOnly) {
2222
+ if (!attrs.includes("readonly")) attrs.push("readonly");
2223
+ }
1840
2224
  if (el.hasAttribute("onclick")) attrs.push("onclick");
1841
2225
  const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
1842
2226
  if (testId) attrs.push(`data-testid="${testId.slice(0, 25)}"`);
@@ -1844,6 +2228,13 @@ function generateSnapshot(root = document.body, options = {}) {
1844
2228
  const currentVal = el.value.slice(0, 40);
1845
2229
  if (el.getAttribute("value") !== currentVal) attrs.push(`val="${currentVal}"`);
1846
2230
  }
2231
+ if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio") && el.checked) {
2232
+ if (!attrs.includes("checked")) attrs.push("checked");
2233
+ }
2234
+ if (el instanceof HTMLSelectElement && el.value) attrs.push(`val="${el.value.slice(0, 40)}"`);
2235
+ if (el instanceof HTMLOptionElement && el.selected) {
2236
+ if (!attrs.includes("selected")) attrs.push("selected");
2237
+ }
1847
2238
  let directText = "";
1848
2239
  for (let i = 0; i < el.childNodes.length; i++) {
1849
2240
  const node = el.childNodes[i];
@@ -2078,72 +2469,104 @@ function createNavigateTool() {
2078
2469
  * 替代 Playwright 的 waitForSelector/waitForNavigation。
2079
2470
  * 运行环境:浏览器 Content Script。
2080
2471
  *
2081
- * 支持 3 种动作:
2472
+ * 支持 4 种动作:
2082
2473
  * wait_for_selector — 等待匹配选择器的元素出现
2083
2474
  * wait_for_hidden — 等待元素消失或隐藏
2084
2475
  * wait_for_text — 等待页面中出现指定文本
2476
+ * wait_for_stable — 等待 DOM 在一段时间内无变化
2085
2477
  */
2086
2478
  /** 默认超时时间(毫秒) */
2087
2479
  const DEFAULT_TIMEOUT = 1e4;
2088
2480
  /**
2089
- * 通过 MutationObserver 等待元素出现。
2090
- * 先检查元素是否已存在,不存在则监听 DOM 变化直到出现或超时。
2481
+ * Playwright 风格可见性判定(近似)。
2482
+ */
2483
+ function isVisible(el) {
2484
+ if (!(el instanceof HTMLElement || el instanceof SVGElement)) return false;
2485
+ if (!el.isConnected) return false;
2486
+ const style = window.getComputedStyle(el);
2487
+ if (style.display === "none" || style.visibility === "hidden") return false;
2488
+ if (style.opacity === "0") return false;
2489
+ const rect = el.getBoundingClientRect();
2490
+ return rect.width > 0 && rect.height > 0;
2491
+ }
2492
+ /**
2493
+ * 读取 selector 当前状态。
2091
2494
  */
2092
- function waitForSelector(selector, timeoutMs) {
2495
+ function evaluateSelectorState(selector, state) {
2496
+ const el = document.querySelector(selector) ?? void 0;
2497
+ switch (state) {
2498
+ case "attached": return {
2499
+ matched: Boolean(el),
2500
+ element: el
2501
+ };
2502
+ case "visible": return {
2503
+ matched: Boolean(el && isVisible(el)),
2504
+ element: el
2505
+ };
2506
+ case "hidden": return {
2507
+ matched: !el || !isVisible(el),
2508
+ element: el
2509
+ };
2510
+ case "detached": return {
2511
+ matched: !el,
2512
+ element: el
2513
+ };
2514
+ default: return { matched: false };
2515
+ }
2516
+ }
2517
+ /**
2518
+ * 等待 selector 达到指定状态(近似 Playwright state 语义)。
2519
+ */
2520
+ function waitForSelectorState(selector, state, timeoutMs) {
2093
2521
  return new Promise((resolve, reject) => {
2094
- const existing = document.querySelector(selector);
2095
- if (existing) {
2096
- resolve(existing);
2097
- return;
2098
- }
2099
- const timer = setTimeout(() => {
2522
+ let finished = false;
2523
+ const finish = (handler) => {
2524
+ if (finished) return;
2525
+ finished = true;
2526
+ clearTimeout(timer);
2527
+ clearInterval(interval);
2100
2528
  observer.disconnect();
2101
- reject(/* @__PURE__ */ new Error(`等待 "${selector}" 超时 (${timeoutMs}ms)`));
2102
- }, timeoutMs);
2103
- const observer = new MutationObserver(() => {
2104
- const el = document.querySelector(selector);
2105
- if (el) {
2106
- clearTimeout(timer);
2107
- observer.disconnect();
2108
- resolve(el);
2529
+ handler();
2530
+ };
2531
+ const check = () => {
2532
+ let result;
2533
+ try {
2534
+ result = evaluateSelectorState(selector, state);
2535
+ } catch {
2536
+ finish(() => reject(/* @__PURE__ */ new Error(`选择器语法错误: ${selector}`)));
2537
+ return;
2109
2538
  }
2110
- });
2539
+ if (result.matched) finish(() => resolve({ element: result.element }));
2540
+ };
2541
+ const timer = setTimeout(() => {
2542
+ finish(() => reject(/* @__PURE__ */ new Error(`等待 "${selector}" 达到状态 "${state}" 超时 (${timeoutMs}ms)`)));
2543
+ }, timeoutMs);
2544
+ const interval = setInterval(check, 80);
2545
+ const observer = new MutationObserver(check);
2111
2546
  observer.observe(document.body, {
2112
2547
  childList: true,
2113
2548
  subtree: true,
2114
- attributes: true
2549
+ attributes: true,
2550
+ characterData: true
2115
2551
  });
2552
+ check();
2116
2553
  });
2117
2554
  }
2118
2555
  /**
2119
- * 等待元素消失或变为不可见。
2556
+ * 等待页面中出现指定文本。
2120
2557
  */
2121
- function waitForHidden(selector, timeoutMs) {
2558
+ function waitForText(text, timeoutMs) {
2122
2559
  return new Promise((resolve, reject) => {
2123
- const existing = document.querySelector(selector);
2124
- if (!existing) {
2125
- resolve();
2126
- return;
2127
- }
2128
- const style = window.getComputedStyle(existing);
2129
- if (style.display === "none" || style.visibility === "hidden") {
2560
+ if (document.body.textContent?.includes(text)) {
2130
2561
  resolve();
2131
2562
  return;
2132
2563
  }
2133
2564
  const timer = setTimeout(() => {
2134
2565
  observer.disconnect();
2135
- reject(/* @__PURE__ */ new Error(`等待 "${selector}" 消失超时 (${timeoutMs}ms)`));
2566
+ reject(/* @__PURE__ */ new Error(`等待文本 "${text}" 出现超时 (${timeoutMs}ms)`));
2136
2567
  }, timeoutMs);
2137
2568
  const observer = new MutationObserver(() => {
2138
- const el = document.querySelector(selector);
2139
- if (!el) {
2140
- clearTimeout(timer);
2141
- observer.disconnect();
2142
- resolve();
2143
- return;
2144
- }
2145
- const s = window.getComputedStyle(el);
2146
- if (s.display === "none" || s.visibility === "hidden") {
2569
+ if (document.body.textContent?.includes(text)) {
2147
2570
  clearTimeout(timer);
2148
2571
  observer.disconnect();
2149
2572
  resolve();
@@ -2152,40 +2575,40 @@ function waitForHidden(selector, timeoutMs) {
2152
2575
  observer.observe(document.body, {
2153
2576
  childList: true,
2154
2577
  subtree: true,
2155
- attributes: true,
2156
- attributeFilter: [
2157
- "style",
2158
- "class",
2159
- "hidden"
2160
- ]
2578
+ characterData: true
2161
2579
  });
2162
2580
  });
2163
2581
  }
2164
2582
  /**
2165
- * 等待页面中出现指定文本。
2583
+ * 等待页面进入稳定状态:在 quietMs 时间窗口内没有 DOM 变化。
2166
2584
  */
2167
- function waitForText(text, timeoutMs) {
2585
+ function waitForDomStable(timeoutMs, quietMs) {
2168
2586
  return new Promise((resolve, reject) => {
2169
- if (document.body.textContent?.includes(text)) {
2170
- resolve();
2171
- return;
2172
- }
2173
- const timer = setTimeout(() => {
2587
+ const startedAt = Date.now();
2588
+ let lastMutationAt = Date.now();
2589
+ const finish = (ok, err) => {
2590
+ clearInterval(tick);
2174
2591
  observer.disconnect();
2175
- reject(/* @__PURE__ */ new Error(`等待文本 "${text}" 出现超时 (${timeoutMs}ms)`));
2176
- }, timeoutMs);
2592
+ if (ok) resolve();
2593
+ else reject(err ?? /* @__PURE__ */ new Error("等待页面稳定失败"));
2594
+ };
2177
2595
  const observer = new MutationObserver(() => {
2178
- if (document.body.textContent?.includes(text)) {
2179
- clearTimeout(timer);
2180
- observer.disconnect();
2181
- resolve();
2182
- }
2596
+ lastMutationAt = Date.now();
2183
2597
  });
2184
2598
  observer.observe(document.body, {
2185
2599
  childList: true,
2186
2600
  subtree: true,
2601
+ attributes: true,
2187
2602
  characterData: true
2188
2603
  });
2604
+ const tick = setInterval(() => {
2605
+ const now = Date.now();
2606
+ if (now - startedAt > timeoutMs) {
2607
+ finish(false, /* @__PURE__ */ new Error(`等待页面稳定超时 (${timeoutMs}ms)`));
2608
+ return;
2609
+ }
2610
+ if (now - lastMutationAt >= quietMs) finish(true);
2611
+ }, 50);
2189
2612
  });
2190
2613
  }
2191
2614
  function createWaitTool() {
@@ -2194,13 +2617,15 @@ function createWaitTool() {
2194
2617
  description: [
2195
2618
  "Wait for DOM changes on the current page.",
2196
2619
  "Actions: wait_for_selector (element appears), wait_for_hidden (element disappears),",
2197
- "wait_for_text (specific text appears in page)."
2620
+ "wait_for_text (specific text appears in page), wait_for_stable (DOM stops changing)."
2198
2621
  ].join(" "),
2199
2622
  schema: Type.Object({
2200
- action: Type.String({ description: "Wait action: wait_for_selector | wait_for_hidden | wait_for_text" }),
2623
+ action: Type.String({ description: "Wait action: wait_for_selector | wait_for_hidden | wait_for_text | wait_for_stable" }),
2201
2624
  selector: Type.Optional(Type.String({ description: "CSS selector for wait_for_selector/wait_for_hidden" })),
2625
+ state: Type.Optional(Type.String({ description: "Selector state for wait_for_selector: attached | visible | hidden | detached (default: attached)" })),
2202
2626
  text: Type.Optional(Type.String({ description: "Text to wait for in wait_for_text" })),
2203
- timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" }))
2627
+ timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" })),
2628
+ quietMs: Type.Optional(Type.Number({ description: "Quiet window for wait_for_stable in milliseconds (default: 300)" }))
2204
2629
  }),
2205
2630
  execute: async (params) => {
2206
2631
  const action = params.action;
@@ -2210,14 +2635,25 @@ function createWaitTool() {
2210
2635
  case "wait_for_selector": {
2211
2636
  const selector = params.selector;
2212
2637
  if (!selector) return { content: "缺少 selector 参数" };
2213
- await waitForSelector(selector, timeoutMs);
2214
- return { content: `元素 "${selector}" 已出现` };
2638
+ const state = params.state ?? "attached";
2639
+ if (![
2640
+ "attached",
2641
+ "visible",
2642
+ "hidden",
2643
+ "detached"
2644
+ ].includes(state)) return { content: `无效 state: ${state}` };
2645
+ const result = await waitForSelectorState(selector, state, timeoutMs);
2646
+ if (state === "attached" || state === "visible") {
2647
+ const tag = result.element?.tagName?.toLowerCase();
2648
+ return { content: `元素 "${selector}" 已达到状态 "${state}"${tag ? ` (${tag})` : ""}` };
2649
+ }
2650
+ return { content: `元素 "${selector}" 已达到状态 "${state}"` };
2215
2651
  }
2216
2652
  case "wait_for_hidden": {
2217
2653
  const selector = params.selector;
2218
2654
  if (!selector) return { content: "缺少 selector 参数" };
2219
- await waitForHidden(selector, timeoutMs);
2220
- return { content: `元素 "${selector}" 已消失` };
2655
+ await waitForSelectorState(selector, "hidden", timeoutMs);
2656
+ return { content: `元素 "${selector}" 已隐藏或消失` };
2221
2657
  }
2222
2658
  case "wait_for_text": {
2223
2659
  const text = params.text;
@@ -2225,6 +2661,11 @@ function createWaitTool() {
2225
2661
  await waitForText(text, timeoutMs);
2226
2662
  return { content: `文本 "${text}" 已出现` };
2227
2663
  }
2664
+ case "wait_for_stable": {
2665
+ const quietMs = Math.max(50, Math.floor(params.quietMs ?? 300));
2666
+ await waitForDomStable(timeoutMs, quietMs);
2667
+ return { content: `页面已稳定(静默窗口 ${quietMs}ms)` };
2668
+ }
2228
2669
  default: return { content: `未知的等待动作: ${action}` };
2229
2670
  }
2230
2671
  } catch (err) {