agentpage 0.0.16 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -73,6 +73,34 @@ function hasToolError(result) {
73
73
  *
74
74
  * 默认关闭 viewportOnly,优先完整性。
75
75
  * viewportOnly defaults to false to prioritize completeness.
76
+ *
77
+ * 步骤(中)/ Steps (EN):
78
+ * 1) 合并调用方 options 与默认值(深度/裁剪/剪枝/节点上限等)。
79
+ * 2) 分发 `page_info.snapshot` 获取当前 DOM 文本快照。
80
+ * 3) 使用 `toContentString` 归一化输出,避免 provider 差异导致结构不一致。
81
+ * 4) 返回稳定字符串给 loop,供后续注入消息与统计。
82
+ *
83
+ * 默认参数意图(中)/ Default parameter rationale (EN):
84
+ * - `maxDepth=8`: 保留足够层级,减少关键控件被截断。
85
+ * - `viewportOnly=false`: 优先完整性,避免误判“元素不存在”。
86
+ * - `pruneLayout=true`: 抑制纯布局噪声,降低 token 压力。
87
+ * - `maxNodes=500` / `maxChildren=30`: 控制体积上限,兼顾可读性。
88
+ * - `maxTextLength=40`: 防止长文本淹没结构信息。
89
+ *
90
+ * 压缩/剪枝是怎么做的(中)/ How compression & pruning works in practice (EN):
91
+ * - `viewportOnly=true` 时:仅保留与视口相交元素(根层容器保留),完全视口外元素跳过。
92
+ * - `pruneLayout=true` 时:无 id/无语义/无交互/无直接文本的布局容器会被“折叠”,
93
+ * 子节点直接提升输出,减少无意义层级。
94
+ * - `maxNodes`:全局节点预算,超限后停止继续遍历并追加 truncation 提示。
95
+ * - `maxChildren`:每个父节点只保留前 N 个子元素,其余用 `... (n children omitted)` 汇总。
96
+ * - `maxTextLength`:节点文本按长度截断,避免长段文案占满上下文。
97
+ * - 交互优先排序:优先输出按钮/输入框/链接等交互元素,再输出普通元素。
98
+ * - 属性压缩:仅保留关键属性(如 id、关键 class、交互属性、布尔状态、val),减少冗余 token。
99
+ *
100
+ * 输入/输出(中)/ I/O contract (EN):
101
+ * - In: `ToolRegistry` + 可选快照参数
102
+ * - Out: 归一化后的快照字符串(始终 string)
103
+ * - Side effects: 无本地状态写入;仅依赖工具调用结果
76
104
  */
77
105
  async function readPageSnapshot(registry, options) {
78
106
  return toContentString((await registry.dispatch("page_info", {
@@ -85,7 +113,14 @@ async function readPageSnapshot(registry, options) {
85
113
  maxTextLength: options?.maxTextLength ?? 40
86
114
  })).content);
87
115
  }
88
- /** 包裹快照(中)/ Wrap snapshot with boundary markers (EN). */
116
+ /**
117
+ * 包裹快照(中)/ Wrap snapshot with boundary markers (EN).
118
+ *
119
+ * 作用(中)/ Purpose (EN):
120
+ * - 为快照加 `SNAPSHOT_START/END` 边界,便于后续正则定位。
121
+ * - 支持去重与旧快照剥离,防止多轮 token 累积。
122
+ * - 仅做纯字符串变换,不访问外部状态。
123
+ */
89
124
  function wrapSnapshot(snapshot) {
90
125
  return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
91
126
  }
@@ -101,6 +136,11 @@ function containsSnapshot(text) {
101
136
  }
102
137
  /**
103
138
  * 剥离旧快照(中)/ Strip outdated snapshot blocks from system prompt (EN).
139
+ *
140
+ * 说明(中)/ Notes (EN):
141
+ * - 当 prompt 中已有历史快照时,将其替换为过期占位文本。
142
+ * - 让每轮真正生效的只有“最新注入快照”,减少冲突上下文。
143
+ * - 这是 prompt 级清理;不会触碰 tool trace 中的原始结果对象。
104
144
  */
105
145
  function stripSnapshotFromPrompt(prompt) {
106
146
  if (!containsSnapshot(prompt)) return prompt;
@@ -1367,10 +1407,13 @@ function buildSystemPrompt(params = {}) {
1367
1407
  * 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
1368
1408
  * 运行环境:浏览器 Content Script。
1369
1409
  *
1370
- * 支持 12 种动作:
1410
+ * 支持 15 种动作:
1371
1411
  * click — 点击元素
1372
1412
  * fill — 填写可编辑控件(input/textarea/select/contenteditable)
1373
1413
  * select_option — 选择下拉框选项(value/label)
1414
+ * clear — 清空输入控件
1415
+ * check — 勾选 checkbox/radio
1416
+ * uncheck — 取消勾选 checkbox
1374
1417
  * type — 逐字符模拟键入
1375
1418
  * focus — 聚焦元素
1376
1419
  * hover — 鼠标悬停(触发 mouseenter/mouseover)
@@ -1448,16 +1491,62 @@ function resolveWaitMs(params) {
1448
1491
  * 模拟真实用户输入:触发 input、change 事件,兼容 React/Vue 等框架。
1449
1492
  */
1450
1493
  function dispatchInputEvents(el) {
1451
- el.dispatchEvent(new Event("input", {
1452
- bubbles: true,
1453
- cancelable: true
1454
- }));
1494
+ try {
1495
+ el.dispatchEvent(new InputEvent("input", {
1496
+ bubbles: true,
1497
+ cancelable: true,
1498
+ inputType: "insertText",
1499
+ data: null
1500
+ }));
1501
+ } catch {
1502
+ el.dispatchEvent(new Event("input", {
1503
+ bubbles: true,
1504
+ cancelable: true
1505
+ }));
1506
+ }
1455
1507
  el.dispatchEvent(new Event("change", {
1456
1508
  bubbles: true,
1457
1509
  cancelable: true
1458
1510
  }));
1459
1511
  }
1460
1512
  /**
1513
+ * 使用原生 setter 写入表单值,提升对受控组件(React/Vue 等)的兼容性。
1514
+ */
1515
+ function setNativeEditableValue(el, value) {
1516
+ const proto = el instanceof HTMLInputElement ? HTMLInputElement.prototype : el instanceof HTMLTextAreaElement ? HTMLTextAreaElement.prototype : HTMLSelectElement.prototype;
1517
+ const descriptor = Object.getOwnPropertyDescriptor(proto, "value");
1518
+ if (descriptor?.set) {
1519
+ descriptor.set.call(el, value);
1520
+ return;
1521
+ }
1522
+ el.value = value;
1523
+ }
1524
+ /**
1525
+ * 读取可编辑元素当前值。
1526
+ */
1527
+ function getEditableValue(el) {
1528
+ return el.value ?? "";
1529
+ }
1530
+ /**
1531
+ * 将常见 key 映射为更接近浏览器语义的 KeyboardEvent.code。
1532
+ */
1533
+ function resolveKeyboardCode(key) {
1534
+ return {
1535
+ Enter: "Enter",
1536
+ Escape: "Escape",
1537
+ Esc: "Escape",
1538
+ Tab: "Tab",
1539
+ Space: "Space",
1540
+ " ": "Space",
1541
+ Backspace: "Backspace",
1542
+ Delete: "Delete",
1543
+ ArrowUp: "ArrowUp",
1544
+ ArrowDown: "ArrowDown",
1545
+ ArrowLeft: "ArrowLeft",
1546
+ ArrowRight: "ArrowRight"
1547
+ }[key] ?? key;
1548
+ }
1549
+ /**
1461
1550
  * 生成元素的可读描述,用于在操作结果中展示实际命中的 DOM 节点。
1462
1551
  * 格式:<tag#id.class> "文本" [attr=val, ...]
1463
1552
  */
@@ -1465,7 +1554,7 @@ function describeElement(el) {
1465
1554
  const tag = el.tagName.toLowerCase();
1466
1555
  const id = el.id ? `#${el.id}` : "";
1467
1556
  const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
1468
- const text = el.textContent?.trim().slice(0, 40) ?? "";
1557
+ const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
1469
1558
  const textHint = text ? ` "${text}"` : "";
1470
1559
  const hints = [];
1471
1560
  for (const attr of [
@@ -1478,30 +1567,135 @@ function describeElement(el) {
1478
1567
  const val = el.getAttribute(attr);
1479
1568
  if (val) hints.push(`${attr}=${val}`);
1480
1569
  }
1570
+ if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
1481
1571
  return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
1482
1572
  }
1573
+ function isElementVisible(el) {
1574
+ if (!(el instanceof HTMLElement || el instanceof SVGElement)) return false;
1575
+ if (!el.isConnected) return false;
1576
+ const style = window.getComputedStyle(el);
1577
+ if (style.display === "none" || style.visibility === "hidden") return false;
1578
+ if (style.opacity === "0") return false;
1579
+ const rect = el.getBoundingClientRect();
1580
+ return rect.width > 0 && rect.height > 0;
1581
+ }
1582
+ function isElementDisabled(el) {
1583
+ if (!(el instanceof HTMLElement)) return false;
1584
+ if (el.hasAttribute("disabled")) return true;
1585
+ if (el.getAttribute("aria-disabled") === "true") return true;
1586
+ if ("disabled" in el && typeof el.disabled === "boolean") return Boolean(el.disabled);
1587
+ return false;
1588
+ }
1589
+ function isEditableElement(el) {
1590
+ if (el instanceof HTMLTextAreaElement) return !el.readOnly;
1591
+ if (el instanceof HTMLInputElement) return !new Set([
1592
+ "checkbox",
1593
+ "radio",
1594
+ "file",
1595
+ "button",
1596
+ "submit",
1597
+ "reset"
1598
+ ]).has(el.type) && !el.readOnly;
1599
+ if (el instanceof HTMLSelectElement) return true;
1600
+ return el instanceof HTMLElement && el.isContentEditable;
1601
+ }
1602
+ function ensureActionable(el, action, selector) {
1603
+ if (!el.isConnected) return {
1604
+ content: `"${selector}" 元素已脱离文档,无法执行 ${action}`,
1605
+ details: {
1606
+ error: true,
1607
+ code: "ELEMENT_DETACHED",
1608
+ action,
1609
+ selector
1610
+ }
1611
+ };
1612
+ if (!new Set(["get_text", "get_attr"]).has(action) && !isElementVisible(el)) return {
1613
+ content: `"${selector}" 元素不可见,无法执行 ${action}`,
1614
+ details: {
1615
+ error: true,
1616
+ code: "ELEMENT_NOT_VISIBLE",
1617
+ action,
1618
+ selector
1619
+ }
1620
+ };
1621
+ if (new Set([
1622
+ "click",
1623
+ "fill",
1624
+ "type",
1625
+ "press",
1626
+ "select_option",
1627
+ "clear",
1628
+ "check",
1629
+ "uncheck"
1630
+ ]).has(action) && isElementDisabled(el)) return {
1631
+ content: `"${selector}" 元素已禁用,无法执行 ${action}`,
1632
+ details: {
1633
+ error: true,
1634
+ code: "ELEMENT_DISABLED",
1635
+ action,
1636
+ selector
1637
+ }
1638
+ };
1639
+ if ([
1640
+ "fill",
1641
+ "type",
1642
+ "clear"
1643
+ ].includes(action) && !isEditableElement(el)) return {
1644
+ content: `"${selector}" 不是可编辑元素,无法执行 ${action}`,
1645
+ details: {
1646
+ error: true,
1647
+ code: "UNSUPPORTED_FILL_TARGET",
1648
+ action,
1649
+ selector
1650
+ }
1651
+ };
1652
+ return null;
1653
+ }
1654
+ function isOptionCandidateVisible(el) {
1655
+ if (!(el instanceof HTMLElement)) return false;
1656
+ if (!isElementVisible(el)) return false;
1657
+ return (el.textContent?.trim() ?? "").length > 0;
1658
+ }
1659
+ function findVisibleOptionByText(text) {
1660
+ const target = text.trim().toLowerCase();
1661
+ if (!target) return null;
1662
+ const nodes = Array.from(document.querySelectorAll("[role=\"option\"], .bk-select-option, .bk-option, [data-option], li, option"));
1663
+ for (const node of nodes) {
1664
+ if (!isOptionCandidateVisible(node)) continue;
1665
+ if ((node.textContent?.trim().toLowerCase() ?? "") === target) return node;
1666
+ }
1667
+ for (const node of nodes) {
1668
+ if (!isOptionCandidateVisible(node)) continue;
1669
+ if ((node.textContent?.trim().toLowerCase() ?? "").includes(target)) return node;
1670
+ }
1671
+ return null;
1672
+ }
1483
1673
  function createDomTool() {
1484
1674
  return {
1485
1675
  name: "dom",
1486
1676
  description: [
1487
1677
  "Perform DOM operations on the current page.",
1488
- "Actions: click, fill, select_option, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
1678
+ "Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
1489
1679
  "Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector."
1490
1680
  ].join(" "),
1491
1681
  schema: Type.Object({
1492
- action: Type.String({ description: "DOM action: click | fill | select_option | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
1682
+ action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
1493
1683
  selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
1494
1684
  value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions" })),
1495
1685
  key: Type.Optional(Type.String({ description: "Key name for press action (e.g. Enter, Escape, Tab, ArrowDown, ArrowUp, Backspace, Delete, Space)" })),
1686
+ label: Type.Optional(Type.String({ description: "Label text for select_option action (fallback when value is not provided)" })),
1687
+ index: Type.Optional(Type.Number({ description: "0-based option index for select_option action" })),
1496
1688
  attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr actions" })),
1497
1689
  className: Type.Optional(Type.String({ description: "CSS class name for add_class/remove_class" })),
1498
1690
  waitMs: Type.Optional(Type.Number({ description: "Optional wait timeout in ms before action (default: 1000). Use 0 to disable waiting." })),
1499
- waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." }))
1691
+ waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." })),
1692
+ force: Type.Optional(Type.Boolean({ description: "Skip actionability checks for interaction actions (default false)." }))
1500
1693
  }),
1501
1694
  execute: async (params) => {
1502
1695
  const action = params.action;
1503
1696
  const selector = params.selector;
1504
1697
  const waitMs = resolveWaitMs(params);
1698
+ const force = params.force === true;
1505
1699
  if (!selector) return { content: "缺少 selector 参数" };
1506
1700
  let el;
1507
1701
  if (waitMs > 0) {
@@ -1541,6 +1735,10 @@ function createDomTool() {
1541
1735
  el = elOrError;
1542
1736
  }
1543
1737
  try {
1738
+ if (!force) {
1739
+ const checkResult = ensureActionable(el, action, selector);
1740
+ if (checkResult) return checkResult;
1741
+ }
1544
1742
  switch (action) {
1545
1743
  case "click":
1546
1744
  if (el instanceof HTMLOptionElement) {
@@ -1554,6 +1752,22 @@ function createDomTool() {
1554
1752
  }
1555
1753
  if (el instanceof HTMLElement) {
1556
1754
  el.focus();
1755
+ el.dispatchEvent(new PointerEvent("pointerdown", {
1756
+ bubbles: true,
1757
+ cancelable: true
1758
+ }));
1759
+ el.dispatchEvent(new MouseEvent("mousedown", {
1760
+ bubbles: true,
1761
+ cancelable: true
1762
+ }));
1763
+ el.dispatchEvent(new PointerEvent("pointerup", {
1764
+ bubbles: true,
1765
+ cancelable: true
1766
+ }));
1767
+ el.dispatchEvent(new MouseEvent("mouseup", {
1768
+ bubbles: true,
1769
+ cancelable: true
1770
+ }));
1557
1771
  el.click();
1558
1772
  } else el.dispatchEvent(new MouseEvent("click", { bubbles: true }));
1559
1773
  return { content: `已点击 ${describeElement(el)}` };
@@ -1581,22 +1795,58 @@ function createDomTool() {
1581
1795
  if (el instanceof HTMLElement) el.focus();
1582
1796
  const eventInit = {
1583
1797
  key,
1584
- code: key,
1798
+ code: resolveKeyboardCode(key),
1585
1799
  bubbles: true,
1586
1800
  cancelable: true
1587
1801
  };
1588
- el.dispatchEvent(new KeyboardEvent("keydown", eventInit));
1802
+ const keydownAllowed = el.dispatchEvent(new KeyboardEvent("keydown", eventInit));
1589
1803
  el.dispatchEvent(new KeyboardEvent("keypress", eventInit));
1590
1804
  el.dispatchEvent(new KeyboardEvent("keyup", eventInit));
1805
+ if (keydownAllowed && key === "Enter") {
1806
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) (el.form ?? el.closest("form"))?.dispatchEvent(new Event("submit", {
1807
+ bubbles: true,
1808
+ cancelable: true
1809
+ }));
1810
+ }
1591
1811
  return { content: `已在 ${describeElement(el)} 上按下 ${key}` };
1592
1812
  }
1593
1813
  case "fill": {
1594
1814
  const value = params.value;
1595
1815
  if (value === void 0) return { content: "缺少 value 参数" };
1596
1816
  if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
1817
+ if (el instanceof HTMLInputElement) {
1818
+ if (new Set([
1819
+ "checkbox",
1820
+ "radio",
1821
+ "file",
1822
+ "button",
1823
+ "submit",
1824
+ "reset"
1825
+ ]).has(el.type)) return {
1826
+ content: `"${selector}" 为 input[type=${el.type}],不支持 fill;请使用 click/press/select_option 等动作。`,
1827
+ details: {
1828
+ error: true,
1829
+ code: "UNSUPPORTED_FILL_TARGET",
1830
+ action,
1831
+ selector
1832
+ }
1833
+ };
1834
+ }
1597
1835
  el.focus();
1598
- el.value = value;
1836
+ setNativeEditableValue(el, value);
1599
1837
  dispatchInputEvents(el);
1838
+ const actualValue = getEditableValue(el);
1839
+ if (actualValue !== value) return {
1840
+ content: `"${selector}" 填写后值不一致:期望 "${value}",实际 "${actualValue}"`,
1841
+ details: {
1842
+ error: true,
1843
+ code: "FILL_NOT_APPLIED",
1844
+ action,
1845
+ selector,
1846
+ expected: value,
1847
+ actual: actualValue
1848
+ }
1849
+ };
1600
1850
  } else if (el instanceof HTMLSelectElement) {
1601
1851
  el.focus();
1602
1852
  let matched = false;
@@ -1615,6 +1865,18 @@ function createDomTool() {
1615
1865
  }
1616
1866
  if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
1617
1867
  dispatchInputEvents(el);
1868
+ const actualValue = getEditableValue(el);
1869
+ if (actualValue !== el.value) return {
1870
+ content: `"${selector}" 下拉框状态异常,未确认写入`,
1871
+ details: {
1872
+ error: true,
1873
+ code: "FILL_NOT_APPLIED",
1874
+ action,
1875
+ selector,
1876
+ expected: value,
1877
+ actual: actualValue
1878
+ }
1879
+ };
1618
1880
  } else if (el instanceof HTMLElement && el.isContentEditable) {
1619
1881
  el.focus();
1620
1882
  el.textContent = value;
@@ -1624,27 +1886,83 @@ function createDomTool() {
1624
1886
  }
1625
1887
  case "select_option": {
1626
1888
  const value = params.value;
1627
- if (value === void 0) return { content: "缺少 value 参数" };
1628
- if (!(el instanceof HTMLSelectElement)) return { content: `"${selector}" 不是下拉框元素` };
1889
+ const label = params.label;
1890
+ const index = typeof params.index === "number" ? Math.floor(params.index) : void 0;
1891
+ if (value === void 0 && label === void 0 && index === void 0) return { content: "缺少可选参数:value 或 label 或 index" };
1892
+ if (!(el instanceof HTMLSelectElement)) {
1893
+ if (!(el instanceof HTMLElement)) return { content: `"${selector}" 不是下拉框元素` };
1894
+ el.focus();
1895
+ el.click();
1896
+ const wanted = (label ?? value ?? "").trim();
1897
+ if (!wanted) return { content: `"${selector}" 为自定义下拉时,需提供 value 或 label` };
1898
+ const option = findVisibleOptionByText(wanted);
1899
+ if (!option) return {
1900
+ content: `未找到与 "${wanted}" 匹配的可见下拉选项(自定义下拉)`,
1901
+ details: {
1902
+ error: true,
1903
+ code: "OPTION_NOT_FOUND",
1904
+ action,
1905
+ selector,
1906
+ wanted
1907
+ }
1908
+ };
1909
+ option.click();
1910
+ return { content: `已在自定义下拉中选择 "${wanted}"` };
1911
+ }
1629
1912
  el.focus();
1630
- let matched = false;
1631
- for (const option of Array.from(el.options)) if (option.value === value) {
1632
- el.value = option.value;
1633
- matched = true;
1634
- break;
1913
+ const options = Array.from(el.options);
1914
+ let selectedOption;
1915
+ if (value !== void 0) selectedOption = options.find((option) => option.value === value);
1916
+ if (!selectedOption && label !== void 0) {
1917
+ const normalizedLabel = label.trim().toLowerCase();
1918
+ selectedOption = options.find((option) => option.text.trim().toLowerCase() === normalizedLabel);
1635
1919
  }
1636
- if (!matched) {
1637
- const normalized = value.trim().toLowerCase();
1638
- for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
1639
- el.value = option.value;
1640
- matched = true;
1641
- break;
1642
- }
1920
+ if (!selectedOption && value !== void 0) {
1921
+ const normalizedValueAsLabel = value.trim().toLowerCase();
1922
+ selectedOption = options.find((option) => option.text.trim().toLowerCase() === normalizedValueAsLabel);
1923
+ }
1924
+ if (!selectedOption && index !== void 0) {
1925
+ if (index < 0 || index >= options.length) return { content: `"${selector}" 下拉框不存在 index=${index} 的选项` };
1926
+ selectedOption = options[index];
1643
1927
  }
1644
- if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
1928
+ if (!selectedOption) return { content: `"${selector}" 下拉框中不存在选项 "${value ?? label ?? `index=${index}`}"` };
1929
+ if (selectedOption.disabled) return { content: `"${selector}" 目标选项已禁用:${selectedOption.value}` };
1930
+ if (!el.multiple) for (const option of options) option.selected = false;
1931
+ selectedOption.selected = true;
1932
+ el.value = selectedOption.value;
1645
1933
  dispatchInputEvents(el);
1646
- return { content: `已选择 ${describeElement(el)}: "${el.value}"` };
1934
+ return { content: `已选择 ${describeElement(el)}: value="${selectedOption.value}", label="${selectedOption.text.trim()}"` };
1647
1935
  }
1936
+ case "clear":
1937
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement) {
1938
+ el.focus();
1939
+ setNativeEditableValue(el, "");
1940
+ dispatchInputEvents(el);
1941
+ return { content: `已清空 ${describeElement(el)}` };
1942
+ }
1943
+ if (el instanceof HTMLElement && el.isContentEditable) {
1944
+ el.focus();
1945
+ el.textContent = "";
1946
+ el.dispatchEvent(new Event("input", { bubbles: true }));
1947
+ return { content: `已清空 ${describeElement(el)}` };
1948
+ }
1949
+ return { content: `"${selector}" 不是可清空元素` };
1950
+ case "check":
1951
+ if (!(el instanceof HTMLInputElement) || el.type !== "checkbox" && el.type !== "radio") return { content: `"${selector}" 不是 checkbox/radio` };
1952
+ el.focus();
1953
+ if (!el.checked) {
1954
+ el.checked = true;
1955
+ dispatchInputEvents(el);
1956
+ }
1957
+ return { content: `已勾选 ${describeElement(el)}` };
1958
+ case "uncheck":
1959
+ if (!(el instanceof HTMLInputElement) || el.type !== "checkbox") return { content: `"${selector}" 不是 checkbox` };
1960
+ el.focus();
1961
+ if (el.checked) {
1962
+ el.checked = false;
1963
+ dispatchInputEvents(el);
1964
+ }
1965
+ return { content: `已取消勾选 ${describeElement(el)}` };
1648
1966
  case "type": {
1649
1967
  const value = params.value;
1650
1968
  if (value === void 0) return { content: "缺少 value 参数" };
@@ -1897,6 +2215,12 @@ function generateSnapshot(root = document.body, options = {}) {
1897
2215
  if (val) attrs.push(`${attr}="${val}"`);
1898
2216
  }
1899
2217
  for (const attr of BOOLEAN_ATTRS) if (el.hasAttribute(attr)) attrs.push(attr);
2218
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement || el instanceof HTMLSelectElement || el instanceof HTMLButtonElement) {
2219
+ if (el.disabled && !attrs.includes("disabled")) attrs.push("disabled");
2220
+ }
2221
+ if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.readOnly) {
2222
+ if (!attrs.includes("readonly")) attrs.push("readonly");
2223
+ }
1900
2224
  if (el.hasAttribute("onclick")) attrs.push("onclick");
1901
2225
  const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
1902
2226
  if (testId) attrs.push(`data-testid="${testId.slice(0, 25)}"`);
@@ -1904,6 +2228,13 @@ function generateSnapshot(root = document.body, options = {}) {
1904
2228
  const currentVal = el.value.slice(0, 40);
1905
2229
  if (el.getAttribute("value") !== currentVal) attrs.push(`val="${currentVal}"`);
1906
2230
  }
2231
+ if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio") && el.checked) {
2232
+ if (!attrs.includes("checked")) attrs.push("checked");
2233
+ }
2234
+ if (el instanceof HTMLSelectElement && el.value) attrs.push(`val="${el.value.slice(0, 40)}"`);
2235
+ if (el instanceof HTMLOptionElement && el.selected) {
2236
+ if (!attrs.includes("selected")) attrs.push("selected");
2237
+ }
1907
2238
  let directText = "";
1908
2239
  for (let i = 0; i < el.childNodes.length; i++) {
1909
2240
  const node = el.childNodes[i];
@@ -2138,72 +2469,104 @@ function createNavigateTool() {
2138
2469
  * 替代 Playwright 的 waitForSelector/waitForNavigation。
2139
2470
  * 运行环境:浏览器 Content Script。
2140
2471
  *
2141
- * 支持 3 种动作:
2472
+ * 支持 4 种动作:
2142
2473
  * wait_for_selector — 等待匹配选择器的元素出现
2143
2474
  * wait_for_hidden — 等待元素消失或隐藏
2144
2475
  * wait_for_text — 等待页面中出现指定文本
2476
+ * wait_for_stable — 等待 DOM 在一段时间内无变化
2145
2477
  */
2146
2478
  /** 默认超时时间(毫秒) */
2147
2479
  const DEFAULT_TIMEOUT = 1e4;
2148
2480
  /**
2149
- * 通过 MutationObserver 等待元素出现。
2150
- * 先检查元素是否已存在,不存在则监听 DOM 变化直到出现或超时。
2481
+ * Playwright 风格可见性判定(近似)。
2482
+ */
2483
+ function isVisible(el) {
2484
+ if (!(el instanceof HTMLElement || el instanceof SVGElement)) return false;
2485
+ if (!el.isConnected) return false;
2486
+ const style = window.getComputedStyle(el);
2487
+ if (style.display === "none" || style.visibility === "hidden") return false;
2488
+ if (style.opacity === "0") return false;
2489
+ const rect = el.getBoundingClientRect();
2490
+ return rect.width > 0 && rect.height > 0;
2491
+ }
2492
+ /**
2493
+ * 读取 selector 当前状态。
2494
+ */
2495
+ function evaluateSelectorState(selector, state) {
2496
+ const el = document.querySelector(selector) ?? void 0;
2497
+ switch (state) {
2498
+ case "attached": return {
2499
+ matched: Boolean(el),
2500
+ element: el
2501
+ };
2502
+ case "visible": return {
2503
+ matched: Boolean(el && isVisible(el)),
2504
+ element: el
2505
+ };
2506
+ case "hidden": return {
2507
+ matched: !el || !isVisible(el),
2508
+ element: el
2509
+ };
2510
+ case "detached": return {
2511
+ matched: !el,
2512
+ element: el
2513
+ };
2514
+ default: return { matched: false };
2515
+ }
2516
+ }
2517
+ /**
2518
+ * 等待 selector 达到指定状态(近似 Playwright state 语义)。
2151
2519
  */
2152
- function waitForSelector(selector, timeoutMs) {
2520
+ function waitForSelectorState(selector, state, timeoutMs) {
2153
2521
  return new Promise((resolve, reject) => {
2154
- const existing = document.querySelector(selector);
2155
- if (existing) {
2156
- resolve(existing);
2157
- return;
2158
- }
2159
- const timer = setTimeout(() => {
2522
+ let finished = false;
2523
+ const finish = (handler) => {
2524
+ if (finished) return;
2525
+ finished = true;
2526
+ clearTimeout(timer);
2527
+ clearInterval(interval);
2160
2528
  observer.disconnect();
2161
- reject(/* @__PURE__ */ new Error(`等待 "${selector}" 超时 (${timeoutMs}ms)`));
2162
- }, timeoutMs);
2163
- const observer = new MutationObserver(() => {
2164
- const el = document.querySelector(selector);
2165
- if (el) {
2166
- clearTimeout(timer);
2167
- observer.disconnect();
2168
- resolve(el);
2529
+ handler();
2530
+ };
2531
+ const check = () => {
2532
+ let result;
2533
+ try {
2534
+ result = evaluateSelectorState(selector, state);
2535
+ } catch {
2536
+ finish(() => reject(/* @__PURE__ */ new Error(`选择器语法错误: ${selector}`)));
2537
+ return;
2169
2538
  }
2170
- });
2539
+ if (result.matched) finish(() => resolve({ element: result.element }));
2540
+ };
2541
+ const timer = setTimeout(() => {
2542
+ finish(() => reject(/* @__PURE__ */ new Error(`等待 "${selector}" 达到状态 "${state}" 超时 (${timeoutMs}ms)`)));
2543
+ }, timeoutMs);
2544
+ const interval = setInterval(check, 80);
2545
+ const observer = new MutationObserver(check);
2171
2546
  observer.observe(document.body, {
2172
2547
  childList: true,
2173
2548
  subtree: true,
2174
- attributes: true
2549
+ attributes: true,
2550
+ characterData: true
2175
2551
  });
2552
+ check();
2176
2553
  });
2177
2554
  }
2178
2555
  /**
2179
- * 等待元素消失或变为不可见。
2556
+ * 等待页面中出现指定文本。
2180
2557
  */
2181
- function waitForHidden(selector, timeoutMs) {
2558
+ function waitForText(text, timeoutMs) {
2182
2559
  return new Promise((resolve, reject) => {
2183
- const existing = document.querySelector(selector);
2184
- if (!existing) {
2185
- resolve();
2186
- return;
2187
- }
2188
- const style = window.getComputedStyle(existing);
2189
- if (style.display === "none" || style.visibility === "hidden") {
2560
+ if (document.body.textContent?.includes(text)) {
2190
2561
  resolve();
2191
2562
  return;
2192
2563
  }
2193
2564
  const timer = setTimeout(() => {
2194
2565
  observer.disconnect();
2195
- reject(/* @__PURE__ */ new Error(`等待 "${selector}" 消失超时 (${timeoutMs}ms)`));
2566
+ reject(/* @__PURE__ */ new Error(`等待文本 "${text}" 出现超时 (${timeoutMs}ms)`));
2196
2567
  }, timeoutMs);
2197
2568
  const observer = new MutationObserver(() => {
2198
- const el = document.querySelector(selector);
2199
- if (!el) {
2200
- clearTimeout(timer);
2201
- observer.disconnect();
2202
- resolve();
2203
- return;
2204
- }
2205
- const s = window.getComputedStyle(el);
2206
- if (s.display === "none" || s.visibility === "hidden") {
2569
+ if (document.body.textContent?.includes(text)) {
2207
2570
  clearTimeout(timer);
2208
2571
  observer.disconnect();
2209
2572
  resolve();
@@ -2212,40 +2575,40 @@ function waitForHidden(selector, timeoutMs) {
2212
2575
  observer.observe(document.body, {
2213
2576
  childList: true,
2214
2577
  subtree: true,
2215
- attributes: true,
2216
- attributeFilter: [
2217
- "style",
2218
- "class",
2219
- "hidden"
2220
- ]
2578
+ characterData: true
2221
2579
  });
2222
2580
  });
2223
2581
  }
2224
2582
  /**
2225
- * 等待页面中出现指定文本。
2583
+ * 等待页面进入稳定状态:在 quietMs 时间窗口内没有 DOM 变化。
2226
2584
  */
2227
- function waitForText(text, timeoutMs) {
2585
+ function waitForDomStable(timeoutMs, quietMs) {
2228
2586
  return new Promise((resolve, reject) => {
2229
- if (document.body.textContent?.includes(text)) {
2230
- resolve();
2231
- return;
2232
- }
2233
- const timer = setTimeout(() => {
2587
+ const startedAt = Date.now();
2588
+ let lastMutationAt = Date.now();
2589
+ const finish = (ok, err) => {
2590
+ clearInterval(tick);
2234
2591
  observer.disconnect();
2235
- reject(/* @__PURE__ */ new Error(`等待文本 "${text}" 出现超时 (${timeoutMs}ms)`));
2236
- }, timeoutMs);
2592
+ if (ok) resolve();
2593
+ else reject(err ?? /* @__PURE__ */ new Error("等待页面稳定失败"));
2594
+ };
2237
2595
  const observer = new MutationObserver(() => {
2238
- if (document.body.textContent?.includes(text)) {
2239
- clearTimeout(timer);
2240
- observer.disconnect();
2241
- resolve();
2242
- }
2596
+ lastMutationAt = Date.now();
2243
2597
  });
2244
2598
  observer.observe(document.body, {
2245
2599
  childList: true,
2246
2600
  subtree: true,
2601
+ attributes: true,
2247
2602
  characterData: true
2248
2603
  });
2604
+ const tick = setInterval(() => {
2605
+ const now = Date.now();
2606
+ if (now - startedAt > timeoutMs) {
2607
+ finish(false, /* @__PURE__ */ new Error(`等待页面稳定超时 (${timeoutMs}ms)`));
2608
+ return;
2609
+ }
2610
+ if (now - lastMutationAt >= quietMs) finish(true);
2611
+ }, 50);
2249
2612
  });
2250
2613
  }
2251
2614
  function createWaitTool() {
@@ -2254,13 +2617,15 @@ function createWaitTool() {
2254
2617
  description: [
2255
2618
  "Wait for DOM changes on the current page.",
2256
2619
  "Actions: wait_for_selector (element appears), wait_for_hidden (element disappears),",
2257
- "wait_for_text (specific text appears in page)."
2620
+ "wait_for_text (specific text appears in page), wait_for_stable (DOM stops changing)."
2258
2621
  ].join(" "),
2259
2622
  schema: Type.Object({
2260
- action: Type.String({ description: "Wait action: wait_for_selector | wait_for_hidden | wait_for_text" }),
2623
+ action: Type.String({ description: "Wait action: wait_for_selector | wait_for_hidden | wait_for_text | wait_for_stable" }),
2261
2624
  selector: Type.Optional(Type.String({ description: "CSS selector for wait_for_selector/wait_for_hidden" })),
2625
+ state: Type.Optional(Type.String({ description: "Selector state for wait_for_selector: attached | visible | hidden | detached (default: attached)" })),
2262
2626
  text: Type.Optional(Type.String({ description: "Text to wait for in wait_for_text" })),
2263
- timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" }))
2627
+ timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" })),
2628
+ quietMs: Type.Optional(Type.Number({ description: "Quiet window for wait_for_stable in milliseconds (default: 300)" }))
2264
2629
  }),
2265
2630
  execute: async (params) => {
2266
2631
  const action = params.action;
@@ -2270,14 +2635,25 @@ function createWaitTool() {
2270
2635
  case "wait_for_selector": {
2271
2636
  const selector = params.selector;
2272
2637
  if (!selector) return { content: "缺少 selector 参数" };
2273
- await waitForSelector(selector, timeoutMs);
2274
- return { content: `元素 "${selector}" 已出现` };
2638
+ const state = params.state ?? "attached";
2639
+ if (![
2640
+ "attached",
2641
+ "visible",
2642
+ "hidden",
2643
+ "detached"
2644
+ ].includes(state)) return { content: `无效 state: ${state}` };
2645
+ const result = await waitForSelectorState(selector, state, timeoutMs);
2646
+ if (state === "attached" || state === "visible") {
2647
+ const tag = result.element?.tagName?.toLowerCase();
2648
+ return { content: `元素 "${selector}" 已达到状态 "${state}"${tag ? ` (${tag})` : ""}` };
2649
+ }
2650
+ return { content: `元素 "${selector}" 已达到状态 "${state}"` };
2275
2651
  }
2276
2652
  case "wait_for_hidden": {
2277
2653
  const selector = params.selector;
2278
2654
  if (!selector) return { content: "缺少 selector 参数" };
2279
- await waitForHidden(selector, timeoutMs);
2280
- return { content: `元素 "${selector}" 已消失` };
2655
+ await waitForSelectorState(selector, "hidden", timeoutMs);
2656
+ return { content: `元素 "${selector}" 已隐藏或消失` };
2281
2657
  }
2282
2658
  case "wait_for_text": {
2283
2659
  const text = params.text;
@@ -2285,6 +2661,11 @@ function createWaitTool() {
2285
2661
  await waitForText(text, timeoutMs);
2286
2662
  return { content: `文本 "${text}" 已出现` };
2287
2663
  }
2664
+ case "wait_for_stable": {
2665
+ const quietMs = Math.max(50, Math.floor(params.quietMs ?? 300));
2666
+ await waitForDomStable(timeoutMs, quietMs);
2667
+ return { content: `页面已稳定(静默窗口 ${quietMs}ms)` };
2668
+ }
2288
2669
  default: return { content: `未知的等待动作: ${action}` };
2289
2670
  }
2290
2671
  } catch (err) {