agentpage 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -216,7 +216,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
216
216
  activeInstruction
217
217
  ];
218
218
  if (currentUrl) parts.push("", `URL: ${currentUrl}`);
219
- if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
219
+ if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.", "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
220
220
  if (protocolViolationHint) parts.push("", protocolViolationHint);
221
221
  messages.push({
222
222
  role: "user",
@@ -252,6 +252,9 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
252
252
  "If action changes DOM (open modal/navigate), stop after that batch and continue next round.",
253
253
  "Do NOT call page_info (get_url/get_title/query_all/snapshot).",
254
254
  "For dropdown/select fields, use dom with action=select_option (or fill on a select).",
255
+ "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.",
256
+ "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.",
257
+ "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.",
255
258
  allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content."
256
259
  ];
257
260
  if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
@@ -373,15 +376,21 @@ async function handleNavigationUrlChange(toolName, toolInput, result, registry,
373
376
  pageContext.latestSnapshot = await readPageSnapshot(registry);
374
377
  }
375
378
  }
376
- /** 只读工具集合(中)/ Read-only tool set (EN). */
379
+ /** 只读工具集合。 */
377
380
  const READ_ONLY_TOOLS = new Set(["page_info"]);
381
+ /** DOM 只读动作集合。 */
382
+ const READ_ONLY_DOM_ACTIONS = new Set(["get_text", "get_attr"]);
378
383
  /**
379
- * 空转检测(中)/ Detect idle loops dominated by read-only actions (EN).
384
+ * 空转检测:识别连续只读轮次并终止。
380
385
  * 返回 -1 表示应终止循环。
381
- * Returns -1 when loop should terminate.
382
386
  */
383
- function detectIdleLoop(toolCallNames, consecutiveReadOnlyRounds) {
384
- if (toolCallNames.every((name) => READ_ONLY_TOOLS.has(name))) {
387
+ function detectIdleLoop(toolCalls, consecutiveReadOnlyRounds) {
388
+ if (toolCalls.length > 0 && toolCalls.every(({ name, input }) => {
389
+ if (READ_ONLY_TOOLS.has(name)) return true;
390
+ if (name !== "dom") return false;
391
+ const action = getToolAction(input);
392
+ return Boolean(action && READ_ONLY_DOM_ACTIONS.has(action));
393
+ })) {
385
394
  const newCount = consecutiveReadOnlyRounds + 1;
386
395
  return newCount >= 2 ? -1 : newCount;
387
396
  }
@@ -391,10 +400,10 @@ function detectIdleLoop(toolCallNames, consecutiveReadOnlyRounds) {
391
400
  //#endregion
392
401
  //#region src/core/agent-loop/index.ts
393
402
  /**
394
- * Agent Loop 主流程(中)/ Core environment-agnostic agent loop (EN).
403
+ * Agent Loop 主流程
395
404
  *
396
405
  * 负责消息构建、AI 决策、工具执行、恢复保护与指标汇总。
397
- * Orchestrates message build, AI decisions, tool execution, recovery, and metrics.
406
+ *
398
407
  *
399
408
  * 流程图(文本):
400
409
  *
@@ -511,15 +520,18 @@ async function executeAgentLoop(params) {
511
520
  return (trimmed.split(/\n\s*\n/)[0]?.trim() ?? trimmed).slice(0, 220);
512
521
  };
513
522
  /**
514
- * 判定动作是否会触发 DOM 结构变化(中)/ Whether action may cause DOM-shape change (EN).
523
+ * 判定动作是否会触发 DOM 结构变化(
515
524
  *
516
525
  * 触发后应强制断轮,等待下一轮新快照继续。
517
- * Force round break after such action and continue with refreshed snapshot next round.
526
+ *
518
527
  */
519
528
  const shouldForceRoundBreak = (toolName, toolInput) => {
520
529
  const action = getToolAction(toolInput);
521
530
  if (toolName === "navigate") return action === "goto" || action === "back" || action === "forward" || action === "reload";
522
- if (toolName === "dom") return action === "click" || action === "press";
531
+ if (toolName === "dom") {
532
+ if (action === "press") return (typeof toolInput === "object" && toolInput !== null ? String(toolInput.key ?? toolInput.value ?? "") : "") === "Enter";
533
+ return false;
534
+ }
523
535
  if (toolName === "evaluate") return true;
524
536
  return false;
525
537
  };
@@ -721,9 +733,12 @@ async function executeAgentLoop(params) {
721
733
  lastRoundHadError = roundHasError;
722
734
  previousRoundTasks = buildTaskArray(executedTaskCalls);
723
735
  previousRoundPlannedTasks = plannedTasksCurrentRound;
724
- const idleResult = detectIdleLoop(executedTaskCalls.map((tc) => tc.name), consecutiveReadOnlyRounds);
736
+ const idleResult = detectIdleLoop(response.toolCalls.map((tc) => ({
737
+ name: tc.name,
738
+ input: tc.input
739
+ })), consecutiveReadOnlyRounds);
725
740
  if (idleResult === -1) {
726
- finalReply = response.text || "任务已完成。";
741
+ finalReply = response.text?.trim() || "任务已完成。";
727
742
  if (finalReply) callbacks?.onText?.(finalReply);
728
743
  break;
729
744
  }
@@ -1305,6 +1320,14 @@ var ToolRegistry = class {
1305
1320
  getDefinitions() {
1306
1321
  return Array.from(this.tools.values());
1307
1322
  }
1323
+ /** 按名称检查工具是否已注册。 */
1324
+ has(name) {
1325
+ return this.tools.has(name);
1326
+ }
1327
+ /** 按名称注销工具,返回是否删除成功。 */
1328
+ unregister(name) {
1329
+ return this.tools.delete(name);
1330
+ }
1308
1331
  /**
1309
1332
  * 根据工具名分发并执行工具调用。
1310
1333
  * - 找到工具 → 执行 execute() → 返回结果
@@ -1339,23 +1362,17 @@ var ToolRegistry = class {
1339
1362
 
1340
1363
  //#endregion
1341
1364
  //#region src/core/system-prompt.ts
1342
- /**
1343
- * 规范化额外指令(中)/ Normalize additional instructions (EN).
1344
- */
1365
+ /** 规范化额外指令。 */
1345
1366
  function normalizeExtraInstructions(input) {
1346
1367
  if (!input) return [];
1347
1368
  return (Array.isArray(input) ? input : [input]).map((s) => s.trim()).filter(Boolean);
1348
1369
  }
1349
1370
  /**
1350
- * 构建系统提示词(中)/ Build system prompt (EN).
1371
+ * 构建系统提示词。
1351
1372
  *
1352
1373
  * 约束:
1353
1374
  * - 输出给模型的提示词正文统一为英文。
1354
- * - 中文仅用于代码注释,便于团队维护。
1355
- *
1356
- * Constraints:
1357
- * - Prompt text sent to model stays English-only.
1358
- * - Chinese content is used in code comments only for maintainability.
1375
+ * - 中文仅用于源码注释,便于团队维护。
1359
1376
  */
1360
1377
  function buildSystemPrompt(params = {}) {
1361
1378
  const sections = [];
@@ -1371,14 +1388,17 @@ function buildSystemPrompt(params = {}) {
1371
1388
  "- Batch independent visible actions in one round. Do not split one form into many rounds unnecessarily.",
1372
1389
  "- Strict input order (MANDATORY): before every fill/type/select_option, click or focus the SAME target immediately in the SAME round.",
1373
1390
  "- Multi-field rule (MANDATORY): execute alternating pairs in one batch: focus/click field A -> fill/type A -> focus/click field B -> fill/type B.",
1391
+ "- Build the minimal action array from CURRENT snapshot to satisfy the target in one round whenever possible.",
1374
1392
  "- Do NOT run focus-only batches (e.g., focus A -> focus B). Each focused input/select target must be followed by its input/select action right away.",
1375
1393
  "- Fixed sequence examples: dom.focus(#name) -> dom.fill(#name, \"new-name\") -> dom.focus(#desc) -> dom.fill(#desc, \"new-desc\"); dom.click(#select) -> dom.select_option(#select, ...).",
1394
+ "- Deterministic delta rule: for increase/decrease steppers, compute target delta from visible current value and emit exactly |delta| clicks in one round (e.g., +2 => click increase twice). Never overshoot then undo.",
1376
1395
  "- For check/uncheck, target the real input control (checkbox/radio), not nearby text/container nodes.",
1377
1396
  "- Form batch rule: for one visible form, complete all independent fields in one round; do not fill one field then verify repeatedly.",
1378
1397
  "- If an action will change DOM (open modal, navigate), stop after that action batch and continue next round with new snapshot.",
1379
1398
  "- Do NOT call page_info (snapshot/query/get_url/get_title). Snapshot is already provided every round.",
1380
1399
  "- For dropdown/select, use dom action=select_option (or fill on select).",
1381
1400
  "- Verification whitelist: do NOT use get_text/get_attr to verify input/select values unless the user explicitly asks for verification.",
1401
+ "- Stop rule: when the requested state is achieved, stop calling tools. If verification is requested, verify once and then return REMAINING: DONE (no repeated get_text/get_attr on the same target).",
1382
1402
  "- Do NOT interact with AutoPilot UI unless user explicitly asks.",
1383
1403
  "",
1384
1404
  "## Output Contract",
@@ -1410,41 +1430,93 @@ function buildSystemPrompt(params = {}) {
1410
1430
  //#endregion
1411
1431
  //#region src/web/tools/dom-tool.ts
1412
1432
  /**
1413
- * DOM Tool — 基于 Web API DOM 操作工具。
1433
+ * DOM Tool — 浏览器 DOM 操作工具(结合 Playwright 核心交互模式增强)。
1414
1434
  *
1415
- * 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
1416
- * 运行环境:浏览器 Content Script。
1435
+ * 关键改进(参考 Playwright):
1436
+ * 1. retarget — 点击时自动重定向到 button/link/label.control
1437
+ * 2. scrollIntoView 多策略 — 4 种 block 对齐轮换,解决 sticky 遮挡
1438
+ * 3. stable 检查 — rAF 逐帧检测元素位置稳定后再操作
1439
+ * 4. hit-target 验证 — elementsFromPoint 检查是否被遮挡
1440
+ * 5. 完整点击事件链 — pointermove→pointerdown→mousedown→pointerup→mouseup→click
1441
+ * 6. check/uncheck 通过 click — 先检查→click 切换→验证状态
1442
+ * 7. press 组合键 — 支持 Control+a, Shift+Enter 等修饰键
1443
+ * 8. fill 分类型 — date/color/range 走 setValue,text 类走 selectAll+原生写入
1444
+ * 9. 自定义下拉增强 — 更广泛的 option 选择器 + 等待弹出
1445
+ * 10. ARIA disabled — 检查祖先链 aria-disabled
1417
1446
  *
1418
- * 支持 15 种动作:
1419
- * click — 点击元素
1420
- * fill — 填写可编辑控件(input/textarea/select/contenteditable)
1421
- * select_option — 选择下拉框选项(value/label)
1422
- * clear — 清空输入控件
1423
- * check — 勾选 checkbox/radio
1424
- * uncheck — 取消勾选 checkbox
1425
- * type — 逐字符模拟键入
1426
- * focus — 聚焦元素
1427
- * hover — 鼠标悬停(触发 mouseenter/mouseover)
1428
- * press — 按下键盘按键(Enter/Escape/Tab/ArrowDown 等)
1429
- * get_text — 获取元素文本内容
1430
- * get_attr — 获取元素属性值
1431
- * set_attr — 设置元素属性
1432
- * add_class — 添加 CSS 类名
1433
- * remove_class — 移除 CSS 类名
1447
+ * 运行环境:浏览器 Content Script(直接访问 DOM,无 CDP)。
1434
1448
  */
1435
- const DEFAULT_WAIT_MS = 1e3;
1436
- /** 当前活跃的 RefStore 实例(由 WebAgent 在 chat() 时设置) */
1449
+ const DEFAULT_WAIT_MS = 2e3;
1450
+ /** scrollIntoView 轮换策略(参考 Playwright dom.ts) */
1451
+ const SCROLL_OPTIONS = [
1452
+ void 0,
1453
+ {
1454
+ block: "end",
1455
+ inline: "end"
1456
+ },
1457
+ {
1458
+ block: "center",
1459
+ inline: "center"
1460
+ },
1461
+ {
1462
+ block: "start",
1463
+ inline: "start"
1464
+ }
1465
+ ];
1466
+ /** fill 时直接 setValue 的 input 类型(参考 Playwright kInputTypesToSetValue) */
1467
+ const INPUT_SET_VALUE_TYPES = new Set([
1468
+ "color",
1469
+ "date",
1470
+ "time",
1471
+ "datetime-local",
1472
+ "month",
1473
+ "range",
1474
+ "week"
1475
+ ]);
1476
+ /** 不可 fill 的 input 类型 */
1477
+ const INPUT_BLOCKED_TYPES = new Set([
1478
+ "checkbox",
1479
+ "radio",
1480
+ "file",
1481
+ "button",
1482
+ "submit",
1483
+ "reset",
1484
+ "image"
1485
+ ]);
1486
+ /** 键名→code 映射 */
1487
+ const KEY_CODE_MAP = {
1488
+ Enter: "Enter",
1489
+ Escape: "Escape",
1490
+ Esc: "Escape",
1491
+ Tab: "Tab",
1492
+ Space: "Space",
1493
+ " ": "Space",
1494
+ Backspace: "Backspace",
1495
+ Delete: "Delete",
1496
+ ArrowUp: "ArrowUp",
1497
+ ArrowDown: "ArrowDown",
1498
+ ArrowLeft: "ArrowLeft",
1499
+ ArrowRight: "ArrowRight",
1500
+ Home: "Home",
1501
+ End: "End",
1502
+ PageUp: "PageUp",
1503
+ PageDown: "PageDown",
1504
+ Control: "ControlLeft",
1505
+ Shift: "ShiftLeft",
1506
+ Alt: "AltLeft",
1507
+ Meta: "MetaLeft"
1508
+ };
1437
1509
  let activeRefStore;
1510
+ function setActiveRefStore(store) {
1511
+ activeRefStore = store;
1512
+ }
1513
+ function getActiveRefStore() {
1514
+ return activeRefStore;
1515
+ }
1438
1516
  function sleep(ms) {
1439
- return new Promise((resolve) => setTimeout(resolve, ms));
1517
+ return new Promise((r) => setTimeout(r, ms));
1440
1518
  }
1441
- /**
1442
- * 安全地查询 DOM 元素。
1443
- *
1444
- * 支持两种定位方式(优先级从高到低):
1445
- * - hash ID(以 "#" 开头且在 RefStore 中存在):确定性 hash 查找(最高效)
1446
- * - CSS 选择器(其他):传统 querySelector
1447
- */
1519
+ /** 查询元素:优先 RefStore hash,回退 CSS 选择器 */
1448
1520
  function queryElement(selector) {
1449
1521
  try {
1450
1522
  if (selector.startsWith("#") && activeRefStore) {
@@ -1462,28 +1534,13 @@ function queryElement(selector) {
1462
1534
  return `选择器语法错误: ${selector}`;
1463
1535
  }
1464
1536
  }
1465
- /**
1466
- * 设置当前活跃的 RefStore(由 WebAgent 在 chat 开始时调用)。
1467
- */
1468
- function setActiveRefStore(store) {
1469
- activeRefStore = store;
1470
- }
1471
- /** 获取当前活跃的 RefStore(供其他工具复用) */
1472
- function getActiveRefStore() {
1473
- return activeRefStore;
1474
- }
1475
- /**
1476
- * 在给定超时时间内轮询查找元素。
1477
- * - 返回 Element:找到元素
1478
- * - 返回 string:选择器语法错误
1479
- * - 返回 null:超时未找到
1480
- */
1537
+ /** 轮询等待元素出现 */
1481
1538
  async function waitForElement(selector, timeoutMs) {
1482
1539
  const start = Date.now();
1483
1540
  while (Date.now() - start <= timeoutMs) {
1484
- const elOrError = queryElement(selector);
1485
- if (typeof elOrError !== "string") return elOrError;
1486
- if (elOrError.startsWith("选择器语法错误")) return elOrError;
1541
+ const r = queryElement(selector);
1542
+ if (typeof r !== "string") return r;
1543
+ if (r.startsWith("选择器语法错误")) return r;
1487
1544
  await sleep(100);
1488
1545
  }
1489
1546
  return null;
@@ -1495,166 +1552,128 @@ function resolveWaitMs(params) {
1495
1552
  if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
1496
1553
  return DEFAULT_WAIT_MS;
1497
1554
  }
1498
- /**
1499
- * 模拟真实用户输入:触发 input、change 事件,兼容 React/Vue 等框架。
1500
- */
1501
- function dispatchInputEvents(el) {
1502
- try {
1503
- el.dispatchEvent(new InputEvent("input", {
1504
- bubbles: true,
1505
- cancelable: true,
1506
- inputType: "insertText",
1507
- data: null
1508
- }));
1509
- } catch {
1510
- el.dispatchEvent(new Event("input", {
1511
- bubbles: true,
1512
- cancelable: true
1513
- }));
1514
- }
1515
- el.dispatchEvent(new Event("change", {
1516
- bubbles: true,
1517
- cancelable: true
1518
- }));
1519
- }
1520
- /**
1521
- * 使用原生 setter 写入表单值,提升对受控组件(React/Vue 等)的兼容性。
1522
- */
1523
- function setNativeEditableValue(el, value) {
1524
- const proto = el instanceof HTMLInputElement ? HTMLInputElement.prototype : el instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLSelectElement.prototype;
1525
- const descriptor = Object.getOwnPropertyDescriptor(proto, "value");
1526
- if (descriptor?.set) {
1527
- descriptor.set.call(el, value);
1528
- return;
1555
+ /** 检查元素样式可见性(处理 checkVisibility / details 折叠 / visibility) */
1556
+ function isStyleVisible(el, style) {
1557
+ style = style ?? window.getComputedStyle(el);
1558
+ if (typeof el.checkVisibility === "function") {
1559
+ if (!el.checkVisibility()) return false;
1560
+ } else {
1561
+ const det = el.closest("details,summary");
1562
+ if (det !== el && det?.nodeName === "DETAILS" && !det.open) return false;
1529
1563
  }
1530
- el.value = value;
1531
- }
1532
- /**
1533
- * 读取可编辑元素当前值。
1534
- */
1535
- function getEditableValue(el) {
1536
- return el.value ?? "";
1537
- }
1538
- /**
1539
- * 将常见 key 映射为更接近浏览器语义的 KeyboardEvent.code。
1540
- */
1541
- function resolveKeyboardCode(key) {
1542
- return {
1543
- Enter: "Enter",
1544
- Escape: "Escape",
1545
- Esc: "Escape",
1546
- Tab: "Tab",
1547
- Space: "Space",
1548
- " ": "Space",
1549
- Backspace: "Backspace",
1550
- Delete: "Delete",
1551
- ArrowUp: "ArrowUp",
1552
- ArrowDown: "ArrowDown",
1553
- ArrowLeft: "ArrowLeft",
1554
- ArrowRight: "ArrowRight"
1555
- }[key] ?? key;
1564
+ return style.visibility === "visible";
1556
1565
  }
1557
1566
  /**
1558
- * 生成元素的可读描述,用于在操作结果中展示实际命中的 DOM 节点。
1559
- * 格式:<tag#id.class> "文本" [attr=val, ...]
1567
+ * 元素可见性检查(参考 Playwright isElementVisible+computeBox)。
1568
+ * 处理 display:contents / display:none / visibility / opacity / 尺寸为 0。
1560
1569
  */
1561
- function describeElement(el) {
1562
- const tag = el.tagName.toLowerCase();
1563
- const id = el.id ? `#${el.id}` : "";
1564
- const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
1565
- const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
1566
- const textHint = text ? ` "${text}"` : "";
1567
- const hints = [];
1568
- for (const attr of [
1569
- "type",
1570
- "name",
1571
- "placeholder",
1572
- "href",
1573
- "role"
1574
- ]) {
1575
- const val = el.getAttribute(attr);
1576
- if (val) hints.push(`${attr}=${val}`);
1577
- }
1578
- if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
1579
- return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
1580
- }
1581
1570
  function isElementVisible(el) {
1582
1571
  if (!(el instanceof HTMLElement || el instanceof SVGElement)) return false;
1583
1572
  if (!el.isConnected) return false;
1584
1573
  const style = window.getComputedStyle(el);
1585
- if (style.display === "none" || style.visibility === "hidden") return false;
1574
+ if (style.display === "contents") {
1575
+ for (let child = el.firstChild; child; child = child.nextSibling) {
1576
+ if (child.nodeType === Node.ELEMENT_NODE && isElementVisible(child)) return true;
1577
+ if (child.nodeType === Node.TEXT_NODE) {
1578
+ const range = document.createRange();
1579
+ range.selectNodeContents(child);
1580
+ const rects = range.getClientRects();
1581
+ for (let i = 0; i < rects.length; i++) if (rects[i].width > 0 && rects[i].height > 0) return true;
1582
+ }
1583
+ }
1584
+ return false;
1585
+ }
1586
+ if (style.display === "none") return false;
1587
+ if (!isStyleVisible(el, style)) return false;
1586
1588
  if (style.opacity === "0") return false;
1587
1589
  const rect = el.getBoundingClientRect();
1588
1590
  return rect.width > 0 && rect.height > 0;
1589
1591
  }
1592
+ /** ARIA disabled:检查元素自身 + 祖先链 aria-disabled(参考 Playwright getAriaDisabled) */
1590
1593
  function isElementDisabled(el) {
1591
- if (!(el instanceof HTMLElement)) return false;
1592
- if (el.hasAttribute("disabled")) return true;
1593
- if (el.getAttribute("aria-disabled") === "true") return true;
1594
- if ("disabled" in el && typeof el.disabled === "boolean") return Boolean(el.disabled);
1594
+ if (el instanceof HTMLButtonElement || el instanceof HTMLInputElement || el instanceof HTMLSelectElement || el instanceof HTMLTextAreaElement) {
1595
+ if (el.disabled) return true;
1596
+ }
1597
+ let cursor = el;
1598
+ while (cursor) {
1599
+ if (cursor.getAttribute("aria-disabled") === "true") return true;
1600
+ cursor = cursor.parentElement;
1601
+ }
1595
1602
  return false;
1596
1603
  }
1597
1604
  function isEditableElement(el) {
1598
1605
  if (el instanceof HTMLTextAreaElement) return !el.readOnly;
1599
- if (el instanceof HTMLInputElement) return !new Set([
1600
- "checkbox",
1601
- "radio",
1602
- "file",
1603
- "button",
1604
- "submit",
1605
- "reset"
1606
- ]).has(el.type) && !el.readOnly;
1606
+ if (el instanceof HTMLInputElement) return !INPUT_BLOCKED_TYPES.has(el.type) && !el.readOnly;
1607
1607
  if (el instanceof HTMLSelectElement) return true;
1608
1608
  return el instanceof HTMLElement && el.isContentEditable;
1609
1609
  }
1610
- function isCheckableInput(el) {
1611
- return el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio");
1612
- }
1613
- function findCheckableIn(el) {
1614
- if (!el) return null;
1615
- const found = el.querySelector("input[type=\"checkbox\"], input[type=\"radio\"]");
1616
- return isCheckableInput(found) ? found : null;
1610
+ /** rAF 逐帧检查元素位置是否连续 3 帧不变 */
1611
+ function checkElementStable(el, timeoutMs = 800) {
1612
+ return new Promise((resolve) => {
1613
+ let lastRect;
1614
+ let stableCount = 0;
1615
+ const start = performance.now();
1616
+ function check() {
1617
+ if (performance.now() - start > timeoutMs || !el.isConnected) {
1618
+ resolve(false);
1619
+ return;
1620
+ }
1621
+ const rect = el.getBoundingClientRect();
1622
+ if (lastRect) {
1623
+ if (!(rect.x === lastRect.x && rect.y === lastRect.y && rect.width === lastRect.width && rect.height === lastRect.height)) stableCount = 0;
1624
+ else if (++stableCount >= 3) {
1625
+ resolve(true);
1626
+ return;
1627
+ }
1628
+ }
1629
+ lastRect = rect;
1630
+ requestAnimationFrame(check);
1631
+ }
1632
+ requestAnimationFrame(check);
1633
+ });
1617
1634
  }
1618
1635
  /**
1619
- * 归一化 check/uncheck 目标:
1620
- * 允许模型命中文本容器/label/div,再回溯到关联 checkbox/radio,
1621
- * 以降低快照剪枝导致的“命中语义节点而非真实控件”失败率。
1636
+ * 将目标重定向到关联的交互控件。
1637
+ * - button-link:非交互元素→最近 button/[role=button]/a/[role=link]
1638
+ * - follow-label:label→control + 非交互→button/[role=button]/[role=checkbox]/[role=radio]
1622
1639
  */
1623
- function resolveCheckableTarget(el) {
1624
- if (isCheckableInput(el)) return el;
1625
- if (el instanceof HTMLLabelElement) {
1626
- const byLabel = findCheckableIn(el);
1627
- if (byLabel) return byLabel;
1628
- const htmlFor = el.htmlFor?.trim();
1629
- if (htmlFor) {
1630
- const byFor = document.getElementById(htmlFor);
1631
- if (isCheckableInput(byFor)) return byFor;
1640
+ function retarget(el, mode) {
1641
+ if (mode === "none") return el;
1642
+ if (!el.matches("input, textarea, select") && !el.isContentEditable) if (mode === "button-link") el = el.closest("button, [role=button], a, [role=link]") || el;
1643
+ else el = el.closest("button, [role=button], [role=checkbox], [role=radio]") || el;
1644
+ if (mode === "follow-label") {
1645
+ if (!el.matches("a, input, textarea, button, select, [role=link], [role=button], [role=checkbox], [role=radio]") && !el.isContentEditable) {
1646
+ const label = el.closest("label");
1647
+ if (label?.control) el = label.control;
1632
1648
  }
1633
1649
  }
1634
- if (el instanceof HTMLElement) {
1635
- const ownerLabel = el.closest("label");
1636
- if (ownerLabel) {
1637
- const byOwnerLabel = findCheckableIn(ownerLabel);
1638
- if (byOwnerLabel) return byOwnerLabel;
1639
- const htmlFor = ownerLabel.htmlFor?.trim();
1640
- if (htmlFor) {
1641
- const byFor = document.getElementById(htmlFor);
1642
- if (isCheckableInput(byFor)) return byFor;
1643
- }
1644
- }
1645
- const inSelf = findCheckableIn(el);
1646
- if (inSelf) return inSelf;
1647
- const prev = el.previousElementSibling;
1648
- if (isCheckableInput(prev)) return prev;
1649
- const next = el.nextElementSibling;
1650
- if (isCheckableInput(next)) return next;
1651
- const parent = el.parentElement;
1652
- const inParent = findCheckableIn(parent);
1653
- if (inParent) return inParent;
1650
+ return el;
1651
+ }
1652
+ function scrollIntoViewIfNeeded(el, retry = 0) {
1653
+ if (retry === 0 && "scrollIntoViewIfNeeded" in el) {
1654
+ el.scrollIntoViewIfNeeded(true);
1655
+ return;
1654
1656
  }
1655
- return null;
1657
+ const opts = SCROLL_OPTIONS[retry % SCROLL_OPTIONS.length];
1658
+ el.scrollIntoView(opts ?? {
1659
+ block: "center",
1660
+ inline: "nearest"
1661
+ });
1656
1662
  }
1657
- function ensureActionable(el, action, selector) {
1663
+ /** 检查元素中心点是否被遮挡,返回遮挡元素描述或 null */
1664
+ function checkHitTarget(el) {
1665
+ const rect = el.getBoundingClientRect();
1666
+ const x = rect.left + rect.width / 2;
1667
+ const y = rect.top + rect.height / 2;
1668
+ const topEl = document.elementFromPoint(x, y);
1669
+ if (!topEl) return null;
1670
+ if (topEl === el || el.contains(topEl) || topEl.contains(el)) return null;
1671
+ const sharedLabel = topEl.closest("label");
1672
+ if (sharedLabel && sharedLabel.contains(el)) return null;
1673
+ return describeElement(topEl);
1674
+ }
1675
+ function ensureActionable(el, action, selector, force) {
1676
+ if (force) return null;
1658
1677
  if (!el.isConnected) return {
1659
1678
  content: `"${selector}" 元素已脱离文档,无法执行 ${action}`,
1660
1679
  details: {
@@ -1683,7 +1702,7 @@ function ensureActionable(el, action, selector) {
1683
1702
  "check",
1684
1703
  "uncheck"
1685
1704
  ]).has(action) && isElementDisabled(el)) return {
1686
- content: `"${selector}" 元素已禁用,无法执行 ${action}`,
1705
+ content: `"${selector}" 元素已禁用(disabled/aria-disabled),无法执行 ${action}`,
1687
1706
  details: {
1688
1707
  error: true,
1689
1708
  code: "ELEMENT_DISABLED",
@@ -1706,25 +1725,289 @@ function ensureActionable(el, action, selector) {
1706
1725
  };
1707
1726
  return null;
1708
1727
  }
1709
- function isOptionCandidateVisible(el) {
1710
- if (!(el instanceof HTMLElement)) return false;
1711
- if (!isElementVisible(el)) return false;
1712
- return (el.textContent?.trim() ?? "").length > 0;
1728
+ function getClickPoint(el) {
1729
+ const r = el.getBoundingClientRect();
1730
+ return {
1731
+ x: r.left + r.width / 2,
1732
+ y: r.top + r.height / 2
1733
+ };
1734
+ }
1735
+ /**
1736
+ * 完整点击事件链(参考 Playwright Mouse.click):
1737
+ * pointermove → mousemove → (per clickCount) pointerdown → mousedown → focus → pointerup → mouseup → click
1738
+ */
1739
+ function dispatchClickEvents(el, clickCount = 1) {
1740
+ const { x, y } = getClickPoint(el);
1741
+ const base = {
1742
+ bubbles: true,
1743
+ cancelable: true,
1744
+ view: window,
1745
+ clientX: x,
1746
+ clientY: y,
1747
+ button: 0
1748
+ };
1749
+ el.dispatchEvent(new PointerEvent("pointermove", {
1750
+ ...base,
1751
+ pointerId: 1
1752
+ }));
1753
+ el.dispatchEvent(new MouseEvent("mousemove", base));
1754
+ for (let cc = 1; cc <= clickCount; cc++) {
1755
+ el.dispatchEvent(new PointerEvent("pointerdown", {
1756
+ ...base,
1757
+ detail: cc,
1758
+ buttons: 1,
1759
+ pointerId: 1
1760
+ }));
1761
+ el.dispatchEvent(new MouseEvent("mousedown", {
1762
+ ...base,
1763
+ detail: cc,
1764
+ buttons: 1
1765
+ }));
1766
+ if (cc === 1 && el !== document.activeElement) el.focus({ preventScroll: true });
1767
+ el.dispatchEvent(new PointerEvent("pointerup", {
1768
+ ...base,
1769
+ detail: cc,
1770
+ pointerId: 1
1771
+ }));
1772
+ el.dispatchEvent(new MouseEvent("mouseup", {
1773
+ ...base,
1774
+ detail: cc
1775
+ }));
1776
+ el.dispatchEvent(new MouseEvent("click", {
1777
+ ...base,
1778
+ detail: cc
1779
+ }));
1780
+ }
1781
+ }
1782
+ /** hover 事件链 */
1783
+ function dispatchHoverEvents(el) {
1784
+ const { x, y } = getClickPoint(el);
1785
+ const base = {
1786
+ bubbles: true,
1787
+ cancelable: true,
1788
+ view: window,
1789
+ clientX: x,
1790
+ clientY: y
1791
+ };
1792
+ el.dispatchEvent(new PointerEvent("pointerenter", {
1793
+ ...base,
1794
+ bubbles: false
1795
+ }));
1796
+ el.dispatchEvent(new MouseEvent("mouseenter", {
1797
+ ...base,
1798
+ bubbles: false
1799
+ }));
1800
+ el.dispatchEvent(new PointerEvent("pointermove", {
1801
+ ...base,
1802
+ pointerId: 1
1803
+ }));
1804
+ el.dispatchEvent(new MouseEvent("mousemove", base));
1805
+ el.dispatchEvent(new MouseEvent("mouseover", base));
1806
+ }
1807
+ /** 派发 input + change 事件(兼容 React/Vue 受控组件) */
1808
+ function dispatchInputEvents(el) {
1809
+ el.dispatchEvent(new Event("input", {
1810
+ bubbles: true,
1811
+ composed: true
1812
+ }));
1813
+ el.dispatchEvent(new Event("change", { bubbles: true }));
1814
+ }
1815
+ /** 原生 setter 写入表单值(绕过 React/Vue getter/setter 拦截) */
1816
+ function setNativeValue(el, value) {
1817
+ const proto = el instanceof HTMLInputElement ? HTMLInputElement.prototype : HTMLTextAreaElement.prototype;
1818
+ const desc = Object.getOwnPropertyDescriptor(proto, "value");
1819
+ if (desc?.set) desc.set.call(el, value);
1820
+ else el.value = value;
1821
+ }
1822
+ function selectText(el) {
1823
+ if (el instanceof HTMLInputElement) {
1824
+ el.select();
1825
+ el.focus();
1826
+ return;
1827
+ }
1828
+ if (el instanceof HTMLTextAreaElement) {
1829
+ el.selectionStart = 0;
1830
+ el.selectionEnd = el.value.length;
1831
+ el.focus();
1832
+ return;
1833
+ }
1834
+ const range = document.createRange();
1835
+ range.selectNodeContents(el);
1836
+ const sel = window.getSelection();
1837
+ if (sel) {
1838
+ sel.removeAllRanges();
1839
+ sel.addRange(range);
1840
+ }
1841
+ if (el instanceof HTMLElement) el.focus();
1842
+ }
1843
+ function splitKeyCombo(key) {
1844
+ const tokens = key.split("+");
1845
+ for (let i = 0; i < tokens.length; i++) if (tokens[i] === "" && i + 1 < tokens.length) {
1846
+ tokens[i + 1] = "+" + tokens[i + 1];
1847
+ tokens.splice(i, 1);
1848
+ }
1849
+ return tokens.filter(Boolean);
1850
+ }
1851
+ function resolveKeyCode(key) {
1852
+ return KEY_CODE_MAP[key] ?? (key.length === 1 ? `Key${key.toUpperCase()}` : key);
1853
+ }
1854
+ /**
1855
+ * 执行 press:修饰键按正序 down → 主键 down/up → 修饰键逆序 up(参考 Playwright)。
1856
+ * 修饰键按下时抑制文本输入(只发 keydown/keyup,不发 keypress)。
1857
+ */
1858
+ function executePress(el, key) {
1859
+ const tokens = splitKeyCombo(key);
1860
+ const mainKey = tokens[tokens.length - 1];
1861
+ const mods = tokens.slice(0, -1);
1862
+ const modState = {
1863
+ ctrlKey: mods.includes("Control"),
1864
+ shiftKey: mods.includes("Shift"),
1865
+ altKey: mods.includes("Alt"),
1866
+ metaKey: mods.includes("Meta")
1867
+ };
1868
+ const hasNonShiftMod = modState.ctrlKey || modState.altKey || modState.metaKey;
1869
+ for (const m of mods) el.dispatchEvent(new KeyboardEvent("keydown", {
1870
+ key: m,
1871
+ code: resolveKeyCode(m),
1872
+ bubbles: true,
1873
+ cancelable: true,
1874
+ ...modState
1875
+ }));
1876
+ if (el.dispatchEvent(new KeyboardEvent("keydown", {
1877
+ key: mainKey,
1878
+ code: resolveKeyCode(mainKey),
1879
+ bubbles: true,
1880
+ cancelable: true,
1881
+ ...modState
1882
+ })) && mainKey.length === 1 && !hasNonShiftMod) el.dispatchEvent(new KeyboardEvent("keypress", {
1883
+ key: mainKey,
1884
+ code: resolveKeyCode(mainKey),
1885
+ bubbles: true,
1886
+ cancelable: true,
1887
+ ...modState
1888
+ }));
1889
+ el.dispatchEvent(new KeyboardEvent("keyup", {
1890
+ key: mainKey,
1891
+ code: resolveKeyCode(mainKey),
1892
+ bubbles: true,
1893
+ cancelable: true,
1894
+ ...modState
1895
+ }));
1896
+ for (let i = mods.length - 1; i >= 0; i--) el.dispatchEvent(new KeyboardEvent("keyup", {
1897
+ key: mods[i],
1898
+ code: resolveKeyCode(mods[i]),
1899
+ bubbles: true,
1900
+ cancelable: true,
1901
+ ...modState
1902
+ }));
1903
+ }
1904
+ function describeElement(el) {
1905
+ const tag = el.tagName.toLowerCase();
1906
+ const id = el.id ? `#${el.id}` : "";
1907
+ const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
1908
+ const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
1909
+ const textHint = text ? ` "${text}"` : "";
1910
+ const hints = [];
1911
+ for (const attr of [
1912
+ "type",
1913
+ "name",
1914
+ "placeholder",
1915
+ "href",
1916
+ "role"
1917
+ ]) {
1918
+ const v = el.getAttribute(attr);
1919
+ if (v) hints.push(`${attr}=${v}`);
1920
+ }
1921
+ if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
1922
+ return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
1923
+ }
1924
+ function getChecked(el) {
1925
+ if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio")) return el.checked;
1926
+ const role = el.getAttribute("role");
1927
+ if (role === "checkbox" || role === "radio" || role === "switch") return el.getAttribute("aria-checked") === "true";
1928
+ return "error";
1929
+ }
1930
+ /**
1931
+ * 归一化 check/uncheck 目标:允许命中文本容器/label/div,回溯到关联 checkbox/radio。
1932
+ */
1933
+ function resolveCheckableTarget(el) {
1934
+ if (getChecked(el) !== "error") return el;
1935
+ if (el instanceof HTMLLabelElement && el.control && getChecked(el.control) !== "error") return el.control;
1936
+ const ownerLabel = el.closest("label");
1937
+ if (ownerLabel?.control && getChecked(ownerLabel.control) !== "error") return ownerLabel.control;
1938
+ const inner = el.querySelector("input[type=\"checkbox\"], input[type=\"radio\"], [role=\"checkbox\"], [role=\"radio\"], [role=\"switch\"]");
1939
+ if (inner && getChecked(inner) !== "error") return inner;
1940
+ const prev = el.previousElementSibling;
1941
+ if (prev && getChecked(prev) !== "error") return prev;
1942
+ const next = el.nextElementSibling;
1943
+ if (next && getChecked(next) !== "error") return next;
1944
+ const parent = el.parentElement;
1945
+ if (parent) {
1946
+ const inP = parent.querySelector("input[type=\"checkbox\"], input[type=\"radio\"], [role=\"checkbox\"], [role=\"radio\"], [role=\"switch\"]");
1947
+ if (inP && getChecked(inP) !== "error") return inP;
1948
+ }
1949
+ return el;
1950
+ }
1951
+ /**
1952
+ * 为 pointer 类动作(click/check/uncheck)解析可点击代理目标:
1953
+ * 当命中隐藏的原生 checkbox/radio/switch input 时,优先改点其可见 label/容器。
1954
+ */
1955
+ function resolvePointerActionTarget(el) {
1956
+ if (!(el instanceof HTMLInputElement)) return el;
1957
+ const inputType = el.type?.toLowerCase() ?? "";
1958
+ if (!(inputType === "checkbox" || inputType === "radio") && el.getAttribute("role") !== "switch") return el;
1959
+ if (isElementVisible(el)) return el;
1960
+ const label = el.labels?.[0] ?? el.closest("label");
1961
+ if (label && isElementVisible(label)) return label;
1962
+ const proxy = el.closest(".el-switch, .el-checkbox, .el-radio, [role='switch'], [role='checkbox'], [role='radio']");
1963
+ if (proxy && isElementVisible(proxy)) return proxy;
1964
+ const siblingProxy = el.parentElement?.querySelector(".el-switch__core, .el-checkbox__inner, .el-radio__inner, [role='switch'], [role='checkbox'], [role='radio']");
1965
+ if (siblingProxy && isElementVisible(siblingProxy)) return siblingProxy;
1966
+ return el;
1967
+ }
1968
+ /**
1969
+ * 当命中表单项说明 label(如 Element Plus el-form-item__label)时,
1970
+ * 自动重定向到同一表单项中的首个可交互控件。
1971
+ */
1972
+ function resolveFormItemControlTarget(el) {
1973
+ if (!(el instanceof HTMLElement)) return el;
1974
+ if (!(el.tagName === "LABEL" || el.classList.contains("el-form-item__label"))) return el;
1975
+ const htmlLabel = el;
1976
+ if (htmlLabel.control && isElementVisible(htmlLabel.control)) return htmlLabel.control;
1977
+ const formItem = el.closest(".el-form-item");
1978
+ if (!formItem) return el;
1979
+ const control = (formItem.querySelector(".el-form-item__content") ?? formItem).querySelector("input:not([type='hidden']), textarea, select, button, [role='switch'], [role='checkbox'], [role='radio'], [role='button'], .el-switch, .el-checkbox, .el-radio, [tabindex]:not([tabindex='-1'])");
1980
+ if (control && isElementVisible(control)) return control;
1981
+ return el;
1713
1982
  }
1714
1983
  function findVisibleOptionByText(text) {
1715
1984
  const target = text.trim().toLowerCase();
1716
1985
  if (!target) return null;
1717
- const nodes = Array.from(document.querySelectorAll("[role=\"option\"], .bk-select-option, .bk-option, [data-option], li, option"));
1718
- for (const node of nodes) {
1719
- if (!isOptionCandidateVisible(node)) continue;
1720
- if ((node.textContent?.trim().toLowerCase() ?? "") === target) return node;
1721
- }
1722
- for (const node of nodes) {
1723
- if (!isOptionCandidateVisible(node)) continue;
1724
- if ((node.textContent?.trim().toLowerCase() ?? "").includes(target)) return node;
1725
- }
1986
+ const selectors = [
1987
+ "[role=\"option\"]",
1988
+ "[role=\"listbox\"] li",
1989
+ ".el-select-dropdown__item",
1990
+ ".el-option",
1991
+ ".ant-select-item-option",
1992
+ ".el-cascader-node",
1993
+ ".el-dropdown-menu__item",
1994
+ "[class*=\"option\"]",
1995
+ "li[data-value]",
1996
+ "option"
1997
+ ].join(", ");
1998
+ const visible = Array.from(document.querySelectorAll(selectors)).filter((n) => n instanceof HTMLElement && isElementVisible(n));
1999
+ for (const n of visible) if (n.textContent?.trim().toLowerCase() === target) return n;
2000
+ for (const n of visible) if (n.textContent?.trim().toLowerCase().includes(target)) return n;
1726
2001
  return null;
1727
2002
  }
2003
+ async function waitForDropdownPopup(maxWait = 500) {
2004
+ const start = Date.now();
2005
+ while (Date.now() - start < maxWait) {
2006
+ const popup = document.querySelector("[role=\"listbox\"], .el-select-dropdown, .el-popper, .ant-select-dropdown, [class*=\"dropdown\"]");
2007
+ if (popup && isElementVisible(popup)) return;
2008
+ await sleep(50);
2009
+ }
2010
+ }
1728
2011
  function createDomTool() {
1729
2012
  return {
1730
2013
  name: "dom",
@@ -1733,21 +2016,27 @@ function createDomTool() {
1733
2016
  "Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
1734
2017
  "Input/Select rule: before each fill/type/select_option, click or focus the same target immediately in the same round.",
1735
2018
  "For multiple fields, use alternating pairs in one batch: focus/click A -> fill/type A -> focus/click B -> fill/type B.",
1736
- "Do not send focus-only batches for editable fields.",
1737
- "Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector."
2019
+ "Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector.",
2020
+ "press supports combo keys like 'Control+a', 'Shift+Enter'.",
2021
+ "check/uncheck is done via click — state change is verified after action.",
2022
+ "Ordinal/index rule: treat visual order as 1-based when the instruction says 'the Nth item' (e.g. 4th star = 4th visible icon from left to right), and avoid off-by-one mistakes.",
2023
+ "Disambiguation rule: distinguish descriptive text/labels from actionable options. Do not click nearby label/help text; click the actual interactive option/control item (icon/button/option) that changes state.",
2024
+ "Unknown/complex components: if a container element (e.g. role=slider, rating, custom widget) has multiple child icons/items in the snapshot but you don't know how to operate it directly, try clicking the appropriate child element instead. For example, a rating component with 5 star icon children — click the 4th icon child to set 4 stars. A slider with a runway — clicking the runway at the right position may work. Always prefer interacting with visible children when the parent container doesn't respond to fill/click as expected.",
2025
+ "fill supports role=slider elements: use fill with a numeric value on a role=slider container (rating/slider) to set its value programmatically."
1738
2026
  ].join(" "),
1739
2027
  schema: Type.Object({
1740
- action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class. For fill/type/select_option, perform click/focus on same target immediately before it." }),
2028
+ action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class." }),
1741
2029
  selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
1742
- value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions. For fill/type, run after click/focus on same target in the same round." })),
1743
- key: Type.Optional(Type.String({ description: "Key name for press action (e.g. Enter, Escape, Tab, ArrowDown, ArrowUp, Backspace, Delete, Space)" })),
1744
- label: Type.Optional(Type.String({ description: "Label text for select_option action (fallback when value is not provided). Run select_option after click/focus on same target in the same round." })),
2030
+ value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions." })),
2031
+ key: Type.Optional(Type.String({ description: "Key for press action. Supports combo: 'Enter', 'Control+a', 'Shift+Enter', 'Meta+c'" })),
2032
+ label: Type.Optional(Type.String({ description: "Label text for select_option action." })),
1745
2033
  index: Type.Optional(Type.Number({ description: "0-based option index for select_option action" })),
1746
- attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr actions" })),
2034
+ attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr" })),
1747
2035
  className: Type.Optional(Type.String({ description: "CSS class name for add_class/remove_class" })),
1748
- waitMs: Type.Optional(Type.Number({ description: "Optional wait timeout in ms before action (default: 1000). Use 0 to disable waiting." })),
1749
- waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." })),
1750
- force: Type.Optional(Type.Boolean({ description: "Skip actionability checks for interaction actions (default false)." }))
2036
+ clickCount: Type.Optional(Type.Number({ description: "Click count (default 1). 2 = double-click, 3 = triple-click." })),
2037
+ waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 2000)." })),
2038
+ waitSeconds: Type.Optional(Type.Number({ description: "Wait timeout in seconds (fallback for waitMs)." })),
2039
+ force: Type.Optional(Type.Boolean({ description: "Skip actionability checks (default false)." }))
1751
2040
  }),
1752
2041
  execute: async (params) => {
1753
2042
  const action = params.action;
@@ -1779,184 +2068,149 @@ function createDomTool() {
1779
2068
  };
1780
2069
  el = found;
1781
2070
  } else {
1782
- const elOrError = queryElement(selector);
1783
- if (typeof elOrError === "string") return {
1784
- content: elOrError,
2071
+ const r = queryElement(selector);
2072
+ if (typeof r === "string") return {
2073
+ content: r,
1785
2074
  details: {
1786
2075
  error: true,
1787
- code: elOrError.startsWith("未找到") ? "ELEMENT_NOT_FOUND" : "INVALID_SELECTOR",
2076
+ code: r.startsWith("未找到") ? "ELEMENT_NOT_FOUND" : "INVALID_SELECTOR",
1788
2077
  action,
1789
2078
  selector,
1790
2079
  waitMs
1791
2080
  }
1792
2081
  };
1793
- el = elOrError;
1794
- }
1795
- if (action === "check" || action === "uncheck") {
1796
- const resolvedCheckable = resolveCheckableTarget(el);
1797
- if (resolvedCheckable) el = resolvedCheckable;
2082
+ el = r;
1798
2083
  }
2084
+ if (action === "check" || action === "uncheck") el = resolveCheckableTarget(el);
2085
+ const actionabilityTarget = action === "click" || action === "check" || action === "uncheck" ? resolvePointerActionTarget(resolveFormItemControlTarget(el)) : el;
1799
2086
  try {
1800
- if (!force) {
1801
- const checkResult = ensureActionable(el, action, selector);
1802
- if (checkResult) return checkResult;
1803
- }
2087
+ const checkResult = ensureActionable(actionabilityTarget, action, selector, force);
2088
+ if (checkResult) return checkResult;
1804
2089
  switch (action) {
1805
- case "click":
1806
- if (el instanceof HTMLOptionElement) {
1807
- const parent = el.parentElement;
2090
+ case "click": {
2091
+ const target = resolvePointerActionTarget(resolveFormItemControlTarget(retarget(el, force ? "none" : "button-link")));
2092
+ const clickCount = typeof params.clickCount === "number" ? params.clickCount : 1;
2093
+ if (target instanceof HTMLOptionElement) {
2094
+ const parent = target.parentElement;
1808
2095
  if (parent instanceof HTMLSelectElement) {
1809
2096
  parent.focus();
1810
- parent.value = el.value;
2097
+ parent.value = target.value;
1811
2098
  dispatchInputEvents(parent);
1812
- return { content: `已选择 ${describeElement(parent)} 的选项 "${el.value}"` };
2099
+ return { content: `已选择 ${describeElement(parent)} 的选项 "${target.value}"` };
1813
2100
  }
1814
2101
  }
1815
- if (el instanceof HTMLElement) {
1816
- el.focus();
1817
- el.dispatchEvent(new PointerEvent("pointerdown", {
1818
- bubbles: true,
1819
- cancelable: true
1820
- }));
1821
- el.dispatchEvent(new MouseEvent("mousedown", {
1822
- bubbles: true,
1823
- cancelable: true
1824
- }));
1825
- el.dispatchEvent(new PointerEvent("pointerup", {
1826
- bubbles: true,
1827
- cancelable: true
1828
- }));
1829
- el.dispatchEvent(new MouseEvent("mouseup", {
1830
- bubbles: true,
1831
- cancelable: true
1832
- }));
1833
- el.click();
1834
- } else el.dispatchEvent(new MouseEvent("click", { bubbles: true }));
1835
- return { content: `已点击 ${describeElement(el)}` };
1836
- case "focus":
1837
- if (el instanceof HTMLElement) el.focus();
1838
- else el.dispatchEvent(new FocusEvent("focus", { bubbles: true }));
1839
- return { content: `已聚焦 ${describeElement(el)}` };
1840
- case "hover":
1841
- el.dispatchEvent(new MouseEvent("mouseenter", {
1842
- bubbles: false,
1843
- cancelable: true
1844
- }));
1845
- el.dispatchEvent(new MouseEvent("mouseover", {
1846
- bubbles: true,
1847
- cancelable: true
1848
- }));
1849
- el.dispatchEvent(new MouseEvent("mousemove", {
1850
- bubbles: true,
1851
- cancelable: true
1852
- }));
1853
- return { content: `已悬停 ${describeElement(el)}` };
1854
- case "press": {
1855
- const key = params.key || params.value;
1856
- if (!key) return { content: "缺少 key 参数(如 Enter, Escape, Tab)" };
1857
- if (el instanceof HTMLElement) el.focus();
1858
- const eventInit = {
1859
- key,
1860
- code: resolveKeyboardCode(key),
1861
- bubbles: true,
1862
- cancelable: true
1863
- };
1864
- const keydownAllowed = el.dispatchEvent(new KeyboardEvent("keydown", eventInit));
1865
- el.dispatchEvent(new KeyboardEvent("keypress", eventInit));
1866
- el.dispatchEvent(new KeyboardEvent("keyup", eventInit));
1867
- if (keydownAllowed && key === "Enter") {
1868
- if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) (el.form ?? el.closest("form"))?.dispatchEvent(new Event("submit", {
1869
- bubbles: true,
1870
- cancelable: true
1871
- }));
1872
- }
1873
- return { content: `已在 ${describeElement(el)} 上按下 ${key}` };
2102
+ if (target instanceof HTMLElement) {
2103
+ scrollIntoViewIfNeeded(target);
2104
+ if (!force) await checkElementStable(target, 500);
2105
+ if (!force) {
2106
+ if (checkHitTarget(target)) {
2107
+ scrollIntoViewIfNeeded(target, 1);
2108
+ await sleep(100);
2109
+ }
2110
+ }
2111
+ dispatchClickEvents(target, clickCount);
2112
+ } else target.dispatchEvent(new MouseEvent("click", { bubbles: true }));
2113
+ return { content: `已点击 ${describeElement(target)}` };
1874
2114
  }
1875
2115
  case "fill": {
1876
2116
  const value = params.value;
1877
2117
  if (value === void 0) return { content: "缺少 value 参数" };
1878
- if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
1879
- if (el instanceof HTMLInputElement) {
1880
- if (new Set([
1881
- "checkbox",
1882
- "radio",
1883
- "file",
1884
- "button",
1885
- "submit",
1886
- "reset"
1887
- ]).has(el.type)) return {
1888
- content: `"${selector}" 为 input[type=${el.type}],不支持 fill;请使用 click/press/select_option 等动作。`,
2118
+ const target = retarget(el, "follow-label");
2119
+ if (target instanceof HTMLInputElement) {
2120
+ const type = target.type.toLowerCase();
2121
+ if (INPUT_BLOCKED_TYPES.has(type)) return {
2122
+ content: `"${selector}" 为 input[type=${type}],不支持 fill;请使用 click/check 等动作。`,
2123
+ details: {
2124
+ error: true,
2125
+ code: "UNSUPPORTED_FILL_TARGET",
2126
+ action,
2127
+ selector
2128
+ }
2129
+ };
2130
+ if (INPUT_SET_VALUE_TYPES.has(type)) {
2131
+ const finalVal = type === "color" ? value.toLowerCase().trim() : value.trim();
2132
+ target.focus();
2133
+ target.value = finalVal;
2134
+ if (target.value !== finalVal) return {
2135
+ content: `"${selector}" 填写格式不匹配(type=${type})`,
1889
2136
  details: {
1890
2137
  error: true,
1891
- code: "UNSUPPORTED_FILL_TARGET",
2138
+ code: "MALFORMED_VALUE",
1892
2139
  action,
1893
2140
  selector
1894
2141
  }
1895
2142
  };
2143
+ dispatchInputEvents(target);
2144
+ return { content: `已填写 ${describeElement(target)}: "${finalVal}"` };
1896
2145
  }
1897
- el.focus();
1898
- setNativeEditableValue(el, value);
1899
- dispatchInputEvents(el);
1900
- const actualValue = getEditableValue(el);
1901
- if (actualValue !== value) return {
1902
- content: `"${selector}" 填写后值不一致:期望 "${value}",实际 "${actualValue}"`,
2146
+ if (type === "number" && isNaN(Number(value.trim()))) return {
2147
+ content: `"${selector}" 为 input[type=number],无法填写非数字 "${value}"`,
1903
2148
  details: {
1904
2149
  error: true,
1905
- code: "FILL_NOT_APPLIED",
2150
+ code: "INVALID_NUMBER",
1906
2151
  action,
1907
- selector,
1908
- expected: value,
1909
- actual: actualValue
2152
+ selector
1910
2153
  }
1911
2154
  };
1912
- } else if (el instanceof HTMLSelectElement) {
1913
- el.focus();
1914
- let matched = false;
1915
- for (const option of Array.from(el.options)) if (option.value === value) {
1916
- el.value = option.value;
1917
- matched = true;
1918
- break;
1919
- }
1920
- if (!matched) {
1921
- const normalized = value.trim().toLowerCase();
1922
- for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
1923
- el.value = option.value;
1924
- matched = true;
1925
- break;
1926
- }
1927
- }
1928
- if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
1929
- dispatchInputEvents(el);
1930
- const actualValue = getEditableValue(el);
1931
- if (actualValue !== el.value) return {
1932
- content: `"${selector}" 下拉框状态异常,未确认写入`,
2155
+ scrollIntoViewIfNeeded(target);
2156
+ target.focus();
2157
+ selectText(target);
2158
+ setNativeValue(target, value);
2159
+ dispatchInputEvents(target);
2160
+ if (target.value !== value) return {
2161
+ content: `"${selector}" 填写后值不一致:期望 "${value}",实际 "${target.value}"`,
1933
2162
  details: {
1934
2163
  error: true,
1935
2164
  code: "FILL_NOT_APPLIED",
1936
2165
  action,
1937
- selector,
1938
- expected: value,
1939
- actual: actualValue
2166
+ selector
1940
2167
  }
1941
2168
  };
1942
- } else if (el instanceof HTMLElement && el.isContentEditable) {
1943
- el.focus();
1944
- el.textContent = value;
1945
- el.dispatchEvent(new Event("input", { bubbles: true }));
1946
- } else return { content: `"${selector}" 不是可编辑元素` };
1947
- return { content: `已填写 ${describeElement(el)}: "${value}"` };
2169
+ return { content: `已填写 ${describeElement(target)}: "${value}"` };
2170
+ }
2171
+ if (target instanceof HTMLTextAreaElement) {
2172
+ scrollIntoViewIfNeeded(target);
2173
+ target.focus();
2174
+ selectText(target);
2175
+ setNativeValue(target, value);
2176
+ dispatchInputEvents(target);
2177
+ return { content: `已填写 ${describeElement(target)}: "${value}"` };
2178
+ }
2179
+ if (target instanceof HTMLSelectElement) {
2180
+ target.focus();
2181
+ const options = Array.from(target.options);
2182
+ let matched = options.find((o) => o.value === value);
2183
+ if (!matched) {
2184
+ const n = value.trim().toLowerCase();
2185
+ matched = options.find((o) => o.text.trim().toLowerCase() === n);
2186
+ }
2187
+ if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
2188
+ target.value = matched.value;
2189
+ dispatchInputEvents(target);
2190
+ return { content: `已填写 ${describeElement(target)}: "${value}"` };
2191
+ }
2192
+ if (target instanceof HTMLElement && target.isContentEditable) {
2193
+ target.focus();
2194
+ selectText(target);
2195
+ if (value) document.execCommand("insertText", false, value);
2196
+ else document.execCommand("delete", false, void 0);
2197
+ return { content: `已填写 ${describeElement(target)}: "${value}"` };
2198
+ }
2199
+ return { content: `"${selector}" 不是可编辑元素` };
1948
2200
  }
1949
2201
  case "select_option": {
1950
2202
  const value = params.value;
1951
2203
  const label = params.label;
1952
2204
  const index = typeof params.index === "number" ? Math.floor(params.index) : void 0;
1953
2205
  if (value === void 0 && label === void 0 && index === void 0) return { content: "缺少可选参数:value 或 label 或 index" };
1954
- if (!(el instanceof HTMLSelectElement)) {
1955
- if (!(el instanceof HTMLElement)) return { content: `"${selector}" 不是下拉框元素` };
1956
- el.focus();
1957
- el.click();
2206
+ const target = retarget(el, "follow-label");
2207
+ if (!(target instanceof HTMLSelectElement)) {
2208
+ if (!(target instanceof HTMLElement)) return { content: `"${selector}" 不是下拉框元素` };
2209
+ scrollIntoViewIfNeeded(target);
1958
2210
  const wanted = (label ?? value ?? "").trim();
1959
2211
  if (!wanted) return { content: `"${selector}" 为自定义下拉时,需提供 value 或 label` };
2212
+ dispatchClickEvents(target);
2213
+ await waitForDropdownPopup(800);
1960
2214
  const option = findVisibleOptionByText(wanted);
1961
2215
  if (!option) return {
1962
2216
  content: `未找到与 "${wanted}" 匹配的可见下拉选项(自定义下拉)`,
@@ -1968,84 +2222,156 @@ function createDomTool() {
1968
2222
  wanted
1969
2223
  }
1970
2224
  };
1971
- option.click();
2225
+ dispatchClickEvents(option);
1972
2226
  return { content: `已在自定义下拉中选择 "${wanted}"` };
1973
2227
  }
1974
- el.focus();
1975
- const options = Array.from(el.options);
1976
- let selectedOption;
1977
- if (value !== void 0) selectedOption = options.find((option) => option.value === value);
1978
- if (!selectedOption && label !== void 0) {
1979
- const normalizedLabel = label.trim().toLowerCase();
1980
- selectedOption = options.find((option) => option.text.trim().toLowerCase() === normalizedLabel);
2228
+ target.focus();
2229
+ const options = Array.from(target.options);
2230
+ let selected;
2231
+ if (value !== void 0) selected = options.find((o) => o.value === value);
2232
+ if (!selected && label !== void 0) {
2233
+ const nl = label.trim().toLowerCase();
2234
+ selected = options.find((o) => o.text.trim().toLowerCase() === nl);
1981
2235
  }
1982
- if (!selectedOption && value !== void 0) {
1983
- const normalizedValueAsLabel = value.trim().toLowerCase();
1984
- selectedOption = options.find((option) => option.text.trim().toLowerCase() === normalizedValueAsLabel);
2236
+ if (!selected && value !== void 0) {
2237
+ const nv = value.trim().toLowerCase();
2238
+ selected = options.find((o) => o.text.trim().toLowerCase() === nv);
1985
2239
  }
1986
- if (!selectedOption && index !== void 0) {
2240
+ if (!selected && index !== void 0) {
1987
2241
  if (index < 0 || index >= options.length) return { content: `"${selector}" 下拉框不存在 index=${index} 的选项` };
1988
- selectedOption = options[index];
2242
+ selected = options[index];
1989
2243
  }
1990
- if (!selectedOption) return { content: `"${selector}" 下拉框中不存在选项 "${value ?? label ?? `index=${index}`}"` };
1991
- if (selectedOption.disabled) return { content: `"${selector}" 目标选项已禁用:${selectedOption.value}` };
1992
- if (!el.multiple) for (const option of options) option.selected = false;
1993
- selectedOption.selected = true;
1994
- el.value = selectedOption.value;
1995
- dispatchInputEvents(el);
1996
- return { content: `已选择 ${describeElement(el)}: value="${selectedOption.value}", label="${selectedOption.text.trim()}"` };
2244
+ if (!selected) return { content: `"${selector}" 下拉框中不存在选项 "${value ?? label ?? `index=${index}`}"` };
2245
+ if (selected.disabled) return {
2246
+ content: `"${selector}" 目标选项已禁用:${selected.value}`,
2247
+ details: {
2248
+ error: true,
2249
+ code: "OPTION_DISABLED",
2250
+ action,
2251
+ selector
2252
+ }
2253
+ };
2254
+ if (!target.multiple) for (const o of options) o.selected = false;
2255
+ selected.selected = true;
2256
+ target.value = selected.value;
2257
+ dispatchInputEvents(target);
2258
+ return { content: `已选择 ${describeElement(target)}: value="${selected.value}", label="${selected.text.trim()}"` };
1997
2259
  }
1998
- case "clear":
1999
- if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement) {
2000
- el.focus();
2001
- setNativeEditableValue(el, "");
2002
- dispatchInputEvents(el);
2003
- return { content: `已清空 ${describeElement(el)}` };
2260
+ case "clear": {
2261
+ const target = retarget(el, "follow-label");
2262
+ if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) {
2263
+ scrollIntoViewIfNeeded(target);
2264
+ target.focus();
2265
+ selectText(target);
2266
+ setNativeValue(target, "");
2267
+ dispatchInputEvents(target);
2268
+ return { content: `已清空 ${describeElement(target)}` };
2004
2269
  }
2005
- if (el instanceof HTMLElement && el.isContentEditable) {
2006
- el.focus();
2007
- el.textContent = "";
2008
- el.dispatchEvent(new Event("input", { bubbles: true }));
2009
- return { content: `已清空 ${describeElement(el)}` };
2270
+ if (target instanceof HTMLSelectElement) {
2271
+ target.focus();
2272
+ target.value = "";
2273
+ dispatchInputEvents(target);
2274
+ return { content: `已清空 ${describeElement(target)}` };
2275
+ }
2276
+ if (target instanceof HTMLElement && target.isContentEditable) {
2277
+ target.focus();
2278
+ selectText(target);
2279
+ document.execCommand("delete", false, void 0);
2280
+ return { content: `已清空 ${describeElement(target)}` };
2010
2281
  }
2011
2282
  return { content: `"${selector}" 不是可清空元素` };
2283
+ }
2012
2284
  case "check":
2013
- if (!(el instanceof HTMLInputElement) || el.type !== "checkbox" && el.type !== "radio") return { content: `"${selector}" 不是 checkbox/radio` };
2014
- el.focus();
2015
- if (!el.checked) {
2016
- el.checked = true;
2017
- dispatchInputEvents(el);
2018
- }
2019
- return { content: `已勾选 ${describeElement(el)}` };
2020
- case "uncheck":
2021
- if (!(el instanceof HTMLInputElement) || el.type !== "checkbox") return { content: `"${selector}" 不是 checkbox` };
2022
- el.focus();
2023
- if (el.checked) {
2024
- el.checked = false;
2285
+ case "uncheck": {
2286
+ const wantChecked = action === "check";
2287
+ const current = getChecked(el);
2288
+ if (current === "error") return {
2289
+ content: `"${selector}" 不是 checkbox/radio/[role=checkbox]/[role=radio],无法 ${action}`,
2290
+ details: {
2291
+ error: true,
2292
+ code: "NOT_CHECKABLE",
2293
+ action,
2294
+ selector
2295
+ }
2296
+ };
2297
+ if (current === wantChecked) return { content: `${describeElement(el)} 已经是${wantChecked ? "选中" : "未选中"}状态` };
2298
+ if (!wantChecked && el instanceof HTMLInputElement && el.type === "radio") return {
2299
+ content: `无法取消 radio 按钮的选中状态`,
2300
+ details: {
2301
+ error: true,
2302
+ code: "CANNOT_UNCHECK_RADIO",
2303
+ action,
2304
+ selector
2305
+ }
2306
+ };
2307
+ const pointerTarget = resolvePointerActionTarget(el);
2308
+ scrollIntoViewIfNeeded(pointerTarget);
2309
+ if (pointerTarget instanceof HTMLElement) dispatchClickEvents(pointerTarget);
2310
+ else pointerTarget.dispatchEvent(new MouseEvent("click", { bubbles: true }));
2311
+ await sleep(50);
2312
+ if (getChecked(el) !== wantChecked && el instanceof HTMLInputElement) {
2313
+ el.checked = wantChecked;
2025
2314
  dispatchInputEvents(el);
2026
2315
  }
2027
- return { content: `已取消勾选 ${describeElement(el)}` };
2316
+ return { content: `已${wantChecked ? "勾选" : "取消勾选"} ${describeElement(el)}` };
2317
+ }
2028
2318
  case "type": {
2029
2319
  const value = params.value;
2030
2320
  if (value === void 0) return { content: "缺少 value 参数" };
2031
- if (el instanceof HTMLElement) el.focus();
2321
+ const target = retarget(el, "follow-label");
2322
+ scrollIntoViewIfNeeded(target);
2323
+ if (target instanceof HTMLElement) target.focus();
2032
2324
  for (const char of value) {
2033
- el.dispatchEvent(new KeyboardEvent("keydown", {
2034
- key: char,
2035
- bubbles: true
2036
- }));
2037
- el.dispatchEvent(new KeyboardEvent("keypress", {
2038
- key: char,
2039
- bubbles: true
2040
- }));
2041
- if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) el.value += char;
2042
- el.dispatchEvent(new Event("input", { bubbles: true }));
2043
- el.dispatchEvent(new KeyboardEvent("keyup", {
2325
+ const init = {
2044
2326
  key: char,
2045
- bubbles: true
2327
+ code: resolveKeyCode(char),
2328
+ bubbles: true,
2329
+ cancelable: true
2330
+ };
2331
+ target.dispatchEvent(new KeyboardEvent("keydown", init));
2332
+ target.dispatchEvent(new KeyboardEvent("keypress", init));
2333
+ if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) {
2334
+ const proto = target instanceof HTMLInputElement ? HTMLInputElement.prototype : HTMLTextAreaElement.prototype;
2335
+ const nativeSet = Object.getOwnPropertyDescriptor(proto, "value")?.set;
2336
+ if (nativeSet) nativeSet.call(target, target.value + char);
2337
+ else target.value += char;
2338
+ } else if (target instanceof HTMLElement && target.isContentEditable) document.execCommand("insertText", false, char);
2339
+ target.dispatchEvent(new Event("input", {
2340
+ bubbles: true,
2341
+ composed: true
2046
2342
  }));
2343
+ target.dispatchEvent(new KeyboardEvent("keyup", init));
2047
2344
  }
2048
- return { content: `已逐字输入到 ${describeElement(el)}: "${value}"` };
2345
+ if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) target.dispatchEvent(new Event("change", { bubbles: true }));
2346
+ return { content: `已逐字输入到 ${describeElement(target)}: "${value}"` };
2347
+ }
2348
+ case "focus": {
2349
+ const target = retarget(el, "follow-label");
2350
+ if (target instanceof HTMLElement || target instanceof SVGElement) {
2351
+ target.focus();
2352
+ target.focus();
2353
+ }
2354
+ return { content: `已聚焦 ${describeElement(target)}` };
2355
+ }
2356
+ case "hover": {
2357
+ const target = retarget(el, "none");
2358
+ scrollIntoViewIfNeeded(target);
2359
+ if (!force) await checkElementStable(target, 500);
2360
+ if (target instanceof HTMLElement) dispatchHoverEvents(target);
2361
+ return { content: `已悬停 ${describeElement(target)}` };
2362
+ }
2363
+ case "press": {
2364
+ const key = params.key || params.value;
2365
+ if (!key) return { content: "缺少 key 参数(如 Enter, Escape, Tab, Control+a)" };
2366
+ const target = retarget(el, "none");
2367
+ scrollIntoViewIfNeeded(target);
2368
+ if (target instanceof HTMLElement) target.focus();
2369
+ executePress(target, key);
2370
+ if (splitKeyCombo(key).pop() === "Enter") (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement ? target.form ?? target.closest("form") : target.closest("form"))?.dispatchEvent(new Event("submit", {
2371
+ bubbles: true,
2372
+ cancelable: true
2373
+ }));
2374
+ return { content: `已在 ${describeElement(target)} 上按下 ${key}` };
2049
2375
  }
2050
2376
  case "get_text": {
2051
2377
  const text = el.textContent?.trim() ?? "";
@@ -2054,8 +2380,21 @@ function createDomTool() {
2054
2380
  case "get_attr": {
2055
2381
  const attribute = params.attribute;
2056
2382
  if (!attribute) return { content: "缺少 attribute 参数" };
2057
- const attrValue = el.getAttribute(attribute);
2058
- return { content: `${describeElement(el)} ${attribute} = ${attrValue ?? "(不存在)"}` };
2383
+ const attrName = attribute.toLowerCase();
2384
+ if (attrName === "checked") {
2385
+ if (el instanceof HTMLInputElement) return { content: `${describeElement(el)} 的 checked = ${String(el.checked)}` };
2386
+ return { content: `${describeElement(el)} 的 checked = ${el.getAttribute("aria-checked") ?? "(不存在)"}` };
2387
+ }
2388
+ if (attrName === "selected") {
2389
+ if (el instanceof HTMLOptionElement) return { content: `${describeElement(el)} 的 selected = ${String(el.selected)}` };
2390
+ return { content: `${describeElement(el)} 的 selected = ${el.getAttribute("aria-selected") ?? "(不存在)"}` };
2391
+ }
2392
+ if (attrName === "disabled") {
2393
+ if (el instanceof HTMLButtonElement || el instanceof HTMLInputElement || el instanceof HTMLSelectElement || el instanceof HTMLTextAreaElement) return { content: `${describeElement(el)} 的 disabled = ${String(el.disabled)}` };
2394
+ }
2395
+ if (attrName === "readonly" && (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement)) return { content: `${describeElement(el)} 的 readonly = ${String(el.readOnly)}` };
2396
+ if (attrName === "value" && (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement)) return { content: `${describeElement(el)} 的 value = ${el.value || "(空)"}` };
2397
+ return { content: `${describeElement(el)} 的 ${attribute} = ${el.getAttribute(attribute) ?? "(不存在)"}` };
2059
2398
  }
2060
2399
  case "set_attr": {
2061
2400
  const attribute = params.attribute;
@@ -2458,26 +2797,42 @@ function createPageInfoTool() {
2458
2797
  //#endregion
2459
2798
  //#region src/web/tools/navigate-tool.ts
2460
2799
  /**
2461
- * Navigate Tool — 基于 Web API 的页面导航工具。
2462
- *
2463
- * 替代 Playwright 的 goto/goBack/goForward/reload。
2464
- * 运行环境:浏览器 Content Script。
2800
+ * Navigate Tool — 页面导航工具(增强版)。
2465
2801
  *
2466
2802
  * 支持 5 种动作:
2467
2803
  * goto — 跳转到指定 URL
2468
2804
  * back — 浏览器后退
2469
2805
  * forward — 浏览器前进
2470
2806
  * reload — 刷新当前页面
2471
- * scroll — 滚动页面到指定位置或元素
2807
+ * scroll — 滚动页面到指定位置或元素(支持 RefStore hash ID + 多策略对齐)
2472
2808
  */
2809
+ /** 解析 selector(支持 RefStore hash ID 和 CSS 选择器) */
2810
+ function resolveElement(selector) {
2811
+ if (selector.startsWith("#")) {
2812
+ const store = getActiveRefStore();
2813
+ if (store) {
2814
+ const id = selector.slice(1);
2815
+ if (store.has(id)) return store.get(id) ?? null;
2816
+ }
2817
+ }
2818
+ try {
2819
+ return document.querySelector(selector);
2820
+ } catch {
2821
+ return null;
2822
+ }
2823
+ }
2473
2824
  function createNavigateTool() {
2474
2825
  return {
2475
2826
  name: "navigate",
2476
- description: ["Navigate the current page.", "Actions: goto (open URL), back, forward, reload, scroll (to position or element)."].join(" "),
2827
+ description: [
2828
+ "Navigate the current page.",
2829
+ "Actions: goto (open URL), back, forward, reload, scroll (to position or element).",
2830
+ "scroll supports hash ID from snapshot (e.g. #r0) or CSS selector."
2831
+ ].join(" "),
2477
2832
  schema: Type.Object({
2478
2833
  action: Type.String({ description: "Navigation action: goto | back | forward | reload | scroll" }),
2479
2834
  url: Type.Optional(Type.String({ description: "URL for goto action" })),
2480
- selector: Type.Optional(Type.String({ description: "CSS selector for scroll action (scrolls element into view)" })),
2835
+ selector: Type.Optional(Type.String({ description: "Element ref ID from snapshot (e.g. #r0) or CSS selector for scroll action" })),
2481
2836
  x: Type.Optional(Type.Number({ description: "Horizontal scroll position (pixels)" })),
2482
2837
  y: Type.Optional(Type.Number({ description: "Vertical scroll position (pixels)" }))
2483
2838
  }),
@@ -2503,9 +2858,10 @@ function createNavigateTool() {
2503
2858
  case "scroll": {
2504
2859
  const selector = params.selector;
2505
2860
  if (selector) {
2506
- const el = document.querySelector(selector);
2861
+ const el = resolveElement(selector);
2507
2862
  if (!el) return { content: `未找到元素 "${selector}"` };
2508
- el.scrollIntoView({
2863
+ if ("scrollIntoViewIfNeeded" in el) el.scrollIntoViewIfNeeded(true);
2864
+ else el.scrollIntoView({
2509
2865
  behavior: "smooth",
2510
2866
  block: "center"
2511
2867
  });
@@ -2538,36 +2894,88 @@ function createNavigateTool() {
2538
2894
  //#endregion
2539
2895
  //#region src/web/tools/wait-tool.ts
2540
2896
  /**
2541
- * Wait Tool 基于 MutationObserver 的元素等待工具。
2897
+ * Wait Tool 等待工具 / Wait utility for DOM conditions.
2542
2898
  *
2543
- * 替代 Playwright waitForSelector/waitForNavigation。
2544
- * 运行环境:浏览器 Content Script。
2899
+ * 支持动作 / Supported actions:
2900
+ * - wait_for_selector: 等待选择器达到状态 / wait selector state
2901
+ * - wait_for_hidden: 等待元素隐藏或移除 / wait element hidden or detached
2902
+ * - wait_for_text: 等待页面出现文本 / wait text appears in page
2903
+ * - wait_for_stable: 等待 DOM 进入静默窗口 / wait DOM quiet window
2545
2904
  *
2546
- * 支持 4 种动作:
2547
- * wait_for_selector — 等待匹配选择器的元素出现
2548
- * wait_for_hidden — 等待元素消失或隐藏
2549
- * wait_for_text — 等待页面中出现指定文本
2550
- * wait_for_stable — 等待 DOM 在一段时间内无变化
2905
+ * 说明 / Notes:
2906
+ * - hash selector(如 #abc123)优先通过 RefStore 解析。
2907
+ * - 可见性语义与 dom-tool 保持一致(参考 Playwright 风格)。
2551
2908
  */
2552
- /** 默认超时时间(毫秒) */
2553
2909
  const DEFAULT_TIMEOUT = 1e4;
2910
+ const POLL_INTERVAL_MS = 80;
2911
+ const STABLE_TICK_MS = 50;
2912
+ const OBSERVER_OPTIONS = {
2913
+ childList: true,
2914
+ subtree: true,
2915
+ attributes: true,
2916
+ characterData: true
2917
+ };
2918
+ const TEXT_OBSERVER_OPTIONS = {
2919
+ childList: true,
2920
+ subtree: true,
2921
+ characterData: true
2922
+ };
2554
2923
  /**
2555
- * Playwright 风格可见性判定(近似)。
2924
+ * 可见性判定 / Visibility check.
2925
+ *
2926
+ * 与 dom-tool 保持一致,处理 display:contents、visibility、opacity、零尺寸等场景。
2556
2927
  */
2557
2928
  function isVisible(el) {
2558
2929
  if (!(el instanceof HTMLElement || el instanceof SVGElement)) return false;
2559
2930
  if (!el.isConnected) return false;
2560
2931
  const style = window.getComputedStyle(el);
2561
- if (style.display === "none" || style.visibility === "hidden") return false;
2932
+ if (style.display === "contents") {
2933
+ for (let child = el.firstChild; child; child = child.nextSibling) {
2934
+ if (child.nodeType === Node.ELEMENT_NODE && isVisible(child)) return true;
2935
+ if (child.nodeType === Node.TEXT_NODE) {
2936
+ const range = document.createRange();
2937
+ range.selectNodeContents(child);
2938
+ const rects = range.getClientRects();
2939
+ for (let i = 0; i < rects.length; i++) if (rects[i].width > 0 && rects[i].height > 0) return true;
2940
+ }
2941
+ }
2942
+ return false;
2943
+ }
2944
+ if (style.display === "none") return false;
2945
+ if (typeof el.checkVisibility === "function") {
2946
+ if (!el.checkVisibility()) return false;
2947
+ }
2948
+ if (style.visibility !== "visible") return false;
2562
2949
  if (style.opacity === "0") return false;
2563
2950
  const rect = el.getBoundingClientRect();
2564
2951
  return rect.width > 0 && rect.height > 0;
2565
2952
  }
2566
2953
  /**
2567
- * 读取 selector 当前状态。
2954
+ * 解析选择器 / Resolve selector.
2955
+ *
2956
+ * 先尝试 RefStore hash,再回退到 document.querySelector。
2957
+ */
2958
+ function resolveSelector(selector) {
2959
+ if (selector.startsWith("#")) {
2960
+ const store = getActiveRefStore();
2961
+ if (store) {
2962
+ const id = selector.slice(1);
2963
+ if (store.has(id)) return store.get(id) ?? null;
2964
+ }
2965
+ }
2966
+ try {
2967
+ return document.querySelector(selector);
2968
+ } catch {
2969
+ return null;
2970
+ }
2971
+ }
2972
+ /**
2973
+ * 计算选择器状态 / Evaluate selector state.
2974
+ *
2975
+ * @returns matched 表示是否达到目标状态;element 为当前命中的元素(如果存在)。
2568
2976
  */
2569
2977
  function evaluateSelectorState(selector, state) {
2570
- const el = document.querySelector(selector) ?? void 0;
2978
+ const el = resolveSelector(selector) ?? void 0;
2571
2979
  switch (state) {
2572
2980
  case "attached": return {
2573
2981
  matched: Boolean(el),
@@ -2589,7 +2997,9 @@ function evaluateSelectorState(selector, state) {
2589
2997
  }
2590
2998
  }
2591
2999
  /**
2592
- * 等待 selector 达到指定状态(近似 Playwright state 语义)。
3000
+ * 等待选择器达到指定状态 / Wait selector reaches state.
3001
+ *
3002
+ * 策略:轮询 + MutationObserver 双通道,既保证及时性也降低漏检概率。
2593
3003
  */
2594
3004
  function waitForSelectorState(selector, state, timeoutMs) {
2595
3005
  return new Promise((resolve, reject) => {
@@ -2615,19 +3025,16 @@ function waitForSelectorState(selector, state, timeoutMs) {
2615
3025
  const timer = setTimeout(() => {
2616
3026
  finish(() => reject(/* @__PURE__ */ new Error(`等待 "${selector}" 达到状态 "${state}" 超时 (${timeoutMs}ms)`)));
2617
3027
  }, timeoutMs);
2618
- const interval = setInterval(check, 80);
3028
+ const interval = setInterval(check, POLL_INTERVAL_MS);
2619
3029
  const observer = new MutationObserver(check);
2620
- observer.observe(document.body, {
2621
- childList: true,
2622
- subtree: true,
2623
- attributes: true,
2624
- characterData: true
2625
- });
3030
+ observer.observe(document.body, OBSERVER_OPTIONS);
2626
3031
  check();
2627
3032
  });
2628
3033
  }
2629
3034
  /**
2630
- * 等待页面中出现指定文本。
3035
+ * 等待文本出现 / Wait text appears.
3036
+ *
3037
+ * 先做一次即时检查,再监听 DOM 变化。
2631
3038
  */
2632
3039
  function waitForText(text, timeoutMs) {
2633
3040
  return new Promise((resolve, reject) => {
@@ -2646,15 +3053,13 @@ function waitForText(text, timeoutMs) {
2646
3053
  resolve();
2647
3054
  }
2648
3055
  });
2649
- observer.observe(document.body, {
2650
- childList: true,
2651
- subtree: true,
2652
- characterData: true
2653
- });
3056
+ observer.observe(document.body, TEXT_OBSERVER_OPTIONS);
2654
3057
  });
2655
3058
  }
2656
3059
  /**
2657
- * 等待页面进入稳定状态:在 quietMs 时间窗口内没有 DOM 变化。
3060
+ * 等待 DOM 稳定 / Wait DOM stable.
3061
+ *
3062
+ * 定义:quietMs 窗口内没有任何 MutationObserver 事件。
2658
3063
  */
2659
3064
  function waitForDomStable(timeoutMs, quietMs) {
2660
3065
  return new Promise((resolve, reject) => {
@@ -2669,12 +3074,7 @@ function waitForDomStable(timeoutMs, quietMs) {
2669
3074
  const observer = new MutationObserver(() => {
2670
3075
  lastMutationAt = Date.now();
2671
3076
  });
2672
- observer.observe(document.body, {
2673
- childList: true,
2674
- subtree: true,
2675
- attributes: true,
2676
- characterData: true
2677
- });
3077
+ observer.observe(document.body, OBSERVER_OPTIONS);
2678
3078
  const tick = setInterval(() => {
2679
3079
  const now = Date.now();
2680
3080
  if (now - startedAt > timeoutMs) {
@@ -2682,7 +3082,7 @@ function waitForDomStable(timeoutMs, quietMs) {
2682
3082
  return;
2683
3083
  }
2684
3084
  if (now - lastMutationAt >= quietMs) finish(true);
2685
- }, 50);
3085
+ }, STABLE_TICK_MS);
2686
3086
  });
2687
3087
  }
2688
3088
  function createWaitTool() {
@@ -3035,7 +3435,17 @@ function registerToolHandler(executors) {
3035
3435
  * │ └──────────┘ └────────────┘ └──────────────┘ │
3036
3436
  * └──────────────────────────────────────────────────┘
3037
3437
  */
3038
- var WebAgent = class {
3438
+ var WebAgent = class WebAgent {
3439
+ /** 默认系统提示词 key(兼容旧版 setSystemPrompt(prompt))。 */
3440
+ static DEFAULT_SYSTEM_PROMPT_KEY = "default";
3441
+ /** 默认内置工具名(注册后受保护,不允许删除)。 */
3442
+ static DEFAULT_TOOL_NAMES = [
3443
+ "dom",
3444
+ "navigate",
3445
+ "page_info",
3446
+ "wait",
3447
+ "evaluate"
3448
+ ];
3039
3449
  /** 用户传入的自定义 AI 客户端实例(优先级高于 token/provider) */
3040
3450
  client;
3041
3451
  token;
@@ -3045,7 +3455,10 @@ var WebAgent = class {
3045
3455
  stream;
3046
3456
  dryRun;
3047
3457
  maxRounds;
3048
- customSystemPrompt;
3458
+ /** system prompt 注册表(key -> prompt 文本)。 */
3459
+ systemPromptRegistry = /* @__PURE__ */ new Map();
3460
+ /** 受保护工具集合(默认工具)。 */
3461
+ protectedToolNames = /* @__PURE__ */ new Set();
3049
3462
  /** 多轮对话记忆开关 */
3050
3463
  memory;
3051
3464
  /** 对话历史(memory 开启时自动累积) */
@@ -3067,10 +3480,11 @@ var WebAgent = class {
3067
3480
  this.stream = options.stream ?? true;
3068
3481
  this.dryRun = options.dryRun ?? false;
3069
3482
  this.maxRounds = options.maxRounds ?? 40;
3070
- this.customSystemPrompt = options.systemPrompt;
3071
3483
  this.memory = options.memory ?? false;
3072
3484
  this.autoSnapshot = options.autoSnapshot ?? true;
3073
3485
  this.snapshotOptions = options.snapshotOptions ?? {};
3486
+ if (typeof options.systemPrompt === "string") this.setSystemPrompt(options.systemPrompt);
3487
+ else if (options.systemPrompt && typeof options.systemPrompt === "object") this.setSystemPrompts(options.systemPrompt);
3074
3488
  }
3075
3489
  /** 注册所有内置 Web 工具(dom, navigate, page_info, wait, evaluate) */
3076
3490
  registerTools() {
@@ -3079,11 +3493,41 @@ var WebAgent = class {
3079
3493
  this.registry.register(createPageInfoTool());
3080
3494
  this.registry.register(createWaitTool());
3081
3495
  this.registry.register(createEvaluateTool());
3496
+ for (const name of WebAgent.DEFAULT_TOOL_NAMES) this.protectedToolNames.add(name);
3082
3497
  }
3083
3498
  /** 注册一个自定义工具 */
3084
3499
  registerTool(tool) {
3085
3500
  this.registry.register(tool);
3086
3501
  }
3502
+ /**
3503
+ * 删除一个已注册工具。
3504
+ * - 默认内置工具(registerTools 注册)不允许删除
3505
+ * - 返回 true 表示删除成功,false 表示不存在或受保护
3506
+ */
3507
+ removeTool(name) {
3508
+ if (this.protectedToolNames.has(name)) return false;
3509
+ return this.registry.unregister(name);
3510
+ }
3511
+ /** 检查工具是否已注册。 */
3512
+ hasTool(name) {
3513
+ return this.registry.has(name);
3514
+ }
3515
+ /** 获取当前所有已注册工具名。 */
3516
+ getToolNames() {
3517
+ return this.registry.getDefinitions().map((tool) => tool.name);
3518
+ }
3519
+ /**
3520
+ * 删除所有“非默认”工具。
3521
+ * 返回值为本次被删除的工具名数组。
3522
+ */
3523
+ clearCustomTools() {
3524
+ const removed = [];
3525
+ for (const tool of this.registry.getDefinitions()) {
3526
+ if (this.protectedToolNames.has(tool.name)) continue;
3527
+ if (this.registry.unregister(tool.name)) removed.push(tool.name);
3528
+ }
3529
+ return removed;
3530
+ }
3087
3531
  /** 获取所有已注册的工具定义列表 */
3088
3532
  getTools() {
3089
3533
  return this.registry.getDefinitions();
@@ -3121,9 +3565,37 @@ var WebAgent = class {
3121
3565
  setDryRun(enabled) {
3122
3566
  this.dryRun = enabled;
3123
3567
  }
3124
- /** 设置自定义系统提示词 */
3125
- setSystemPrompt(prompt) {
3126
- this.customSystemPrompt = prompt;
3568
+ setSystemPrompt(keyOrPrompt, maybePrompt) {
3569
+ const key = maybePrompt === void 0 ? WebAgent.DEFAULT_SYSTEM_PROMPT_KEY : keyOrPrompt.trim();
3570
+ const prompt = maybePrompt === void 0 ? keyOrPrompt : maybePrompt;
3571
+ if (!key) throw new Error("system prompt 的 key 不能为空");
3572
+ const value = prompt.trim();
3573
+ if (!value) throw new Error("system prompt 不能为空");
3574
+ this.systemPromptRegistry.set(key, value);
3575
+ }
3576
+ /** 批量注册系统提示词(key -> prompt)。 */
3577
+ setSystemPrompts(prompts) {
3578
+ for (const [key, prompt] of Object.entries(prompts)) this.setSystemPrompt(key, prompt);
3579
+ }
3580
+ /** 注销指定 key 的系统提示词。 */
3581
+ removeSystemPrompt(key) {
3582
+ return this.systemPromptRegistry.delete(key);
3583
+ }
3584
+ /** 只保留指定 key 的系统提示词,其余全部删除。 */
3585
+ keepOnlySystemPrompt(key) {
3586
+ if (!this.systemPromptRegistry.has(key)) return false;
3587
+ const value = this.systemPromptRegistry.get(key);
3588
+ this.systemPromptRegistry.clear();
3589
+ this.systemPromptRegistry.set(key, value);
3590
+ return true;
3591
+ }
3592
+ /** 获取当前已注册的全部系统提示词(浅拷贝)。 */
3593
+ getSystemPrompts() {
3594
+ return Object.fromEntries(this.systemPromptRegistry.entries());
3595
+ }
3596
+ /** 删除全部系统提示词。 */
3597
+ clearSystemPrompts() {
3598
+ this.systemPromptRegistry.clear();
3127
3599
  }
3128
3600
  /** 开启或关闭多轮对话记忆 */
3129
3601
  setMemory(enabled) {
@@ -3165,7 +3637,11 @@ var WebAgent = class {
3165
3637
  */
3166
3638
  async chat(message) {
3167
3639
  const client = this.client ?? this.createBuiltinClient();
3168
- let systemPrompt = this.customSystemPrompt ?? buildSystemPrompt({ tools: this.registry.getDefinitions() });
3640
+ let systemPrompt = buildSystemPrompt({ tools: this.registry.getDefinitions() });
3641
+ if (this.systemPromptRegistry.size > 0) {
3642
+ const extensionText = Array.from(this.systemPromptRegistry.entries()).map(([key, prompt]) => `- [${key}]\n${prompt}`).join("\n\n");
3643
+ systemPrompt += `\n\n## Registered System Prompt Extensions\n${extensionText}`;
3644
+ }
3169
3645
  const refStore = new RefStore(globalThis.location?.href);
3170
3646
  setActiveRefStore(refStore);
3171
3647
  let initialSnapshot;