agentpage 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +93 -19
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +464 -380
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -172,6 +172,7 @@ async function executeAgentLoop(params) {
|
|
|
172
172
|
}
|
|
173
173
|
}
|
|
174
174
|
if (tc.name === "dom" && pageContext.needsSnapshotBeforeDom) {
|
|
175
|
+
callbacks?.onBeforeRecoverySnapshot?.(pageContext.currentUrl);
|
|
175
176
|
const snapshotText = await readPageSnapshot(registry, 8);
|
|
176
177
|
pageContext.latestSnapshot = snapshotText;
|
|
177
178
|
pageContext.needsSnapshotBeforeDom = false;
|
|
@@ -224,6 +225,7 @@ async function executeAgentLoop(params) {
|
|
|
224
225
|
const recoveryWaitMs = resolveRecoveryWaitMs(tc.input);
|
|
225
226
|
if (attempts <= DEFAULT_ACTION_RECOVERY_ROUNDS) {
|
|
226
227
|
await sleep$1(recoveryWaitMs);
|
|
228
|
+
callbacks?.onBeforeRecoverySnapshot?.();
|
|
227
229
|
const snapshotText = await readPageSnapshot(registry, 8);
|
|
228
230
|
pageContext.latestSnapshot = snapshotText;
|
|
229
231
|
const originalError = toContentString(result.content);
|
|
@@ -791,7 +793,7 @@ var ToolRegistry = class {
|
|
|
791
793
|
*/
|
|
792
794
|
function buildSystemPrompt(params = {}) {
|
|
793
795
|
const sections = [];
|
|
794
|
-
sections.push("You are AutoPilot, an AI agent embedded in the user's web page.\nYou can
|
|
796
|
+
sections.push("You are AutoPilot, an AI agent embedded in the user's web page.\nYou can click, fill forms, read content, navigate, and execute JavaScript.\n\n## 操作规则\n\n1. 快照中每个元素末尾的 `#xxxx` 是 hash ID。操作时**必须**用 `#xxxx` 作为 dom 工具的 selector 参数。\n2. **禁止**猜测 CSS 选择器,只用快照中的 hash ID。\n3. 多个相似元素时,根据层级结构、所在功能区域和用户意图判断目标。\n4. 快照看不到目标时,先滚动页面或用 snapshot 获取更深层级。\n5. 破坏性操作前先与用户确认。");
|
|
795
797
|
const tools = params.tools ?? [];
|
|
796
798
|
if (tools.length > 0) {
|
|
797
799
|
const toolLines = tools.map((t) => `- **${t.name}**: ${t.description}`);
|
|
@@ -800,6 +802,271 @@ function buildSystemPrompt(params = {}) {
|
|
|
800
802
|
return sections.join("\n\n");
|
|
801
803
|
}
|
|
802
804
|
|
|
805
|
+
//#endregion
|
|
806
|
+
//#region src/web/dom-tool.ts
|
|
807
|
+
/**
|
|
808
|
+
* DOM Tool — 基于 Web API 的 DOM 操作工具。
|
|
809
|
+
*
|
|
810
|
+
* 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
|
|
811
|
+
* 运行环境:浏览器 Content Script。
|
|
812
|
+
*
|
|
813
|
+
* 支持 8 种动作:
|
|
814
|
+
* click — 点击元素
|
|
815
|
+
* fill — 填写输入框(清空后设值)
|
|
816
|
+
* type — 逐字符模拟键入
|
|
817
|
+
* get_text — 获取元素文本内容
|
|
818
|
+
* get_attr — 获取元素属性值
|
|
819
|
+
* set_attr — 设置元素属性
|
|
820
|
+
* add_class — 添加 CSS 类名
|
|
821
|
+
* remove_class — 移除 CSS 类名
|
|
822
|
+
*/
|
|
823
|
+
const DEFAULT_WAIT_MS = 1e3;
|
|
824
|
+
/** 当前活跃的 RefStore 实例(由 WebAgent 在 chat() 时设置) */
|
|
825
|
+
let activeRefStore;
|
|
826
|
+
function sleep(ms) {
|
|
827
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
828
|
+
}
|
|
829
|
+
/**
|
|
830
|
+
* 安全地查询 DOM 元素。
|
|
831
|
+
*
|
|
832
|
+
* 支持两种定位方式(优先级从高到低):
|
|
833
|
+
* - hash ID(以 "#" 开头且在 RefStore 中存在):确定性 hash 查找(最高效)
|
|
834
|
+
* - CSS 选择器(其他):传统 querySelector
|
|
835
|
+
*/
|
|
836
|
+
function queryElement(selector) {
|
|
837
|
+
try {
|
|
838
|
+
if (selector.startsWith("#") && activeRefStore) {
|
|
839
|
+
const id = selector.slice(1);
|
|
840
|
+
if (activeRefStore.has(id)) {
|
|
841
|
+
const el = activeRefStore.get(id);
|
|
842
|
+
if (!el) return `未找到 ref "${selector}" 对应的元素(可能已被移除或快照已过期)`;
|
|
843
|
+
return el;
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
const el = document.querySelector(selector);
|
|
847
|
+
if (!el) return `未找到匹配 "${selector}" 的元素`;
|
|
848
|
+
return el;
|
|
849
|
+
} catch (e) {
|
|
850
|
+
return `选择器语法错误: ${selector}`;
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
/**
|
|
854
|
+
* 设置当前活跃的 RefStore(由 WebAgent 在 chat 开始时调用)。
|
|
855
|
+
*/
|
|
856
|
+
function setActiveRefStore(store) {
|
|
857
|
+
activeRefStore = store;
|
|
858
|
+
}
|
|
859
|
+
/** 获取当前活跃的 RefStore(供其他工具复用) */
|
|
860
|
+
function getActiveRefStore() {
|
|
861
|
+
return activeRefStore;
|
|
862
|
+
}
|
|
863
|
+
/**
|
|
864
|
+
* 在给定超时时间内轮询查找元素。
|
|
865
|
+
* - 返回 Element:找到元素
|
|
866
|
+
* - 返回 string:选择器语法错误
|
|
867
|
+
* - 返回 null:超时未找到
|
|
868
|
+
*/
|
|
869
|
+
async function waitForElement(selector, timeoutMs) {
|
|
870
|
+
const start = Date.now();
|
|
871
|
+
while (Date.now() - start <= timeoutMs) {
|
|
872
|
+
const elOrError = queryElement(selector);
|
|
873
|
+
if (typeof elOrError !== "string") return elOrError;
|
|
874
|
+
if (elOrError.startsWith("选择器语法错误")) return elOrError;
|
|
875
|
+
await sleep(100);
|
|
876
|
+
}
|
|
877
|
+
return null;
|
|
878
|
+
}
|
|
879
|
+
function resolveWaitMs(params) {
|
|
880
|
+
const waitMs = params.waitMs;
|
|
881
|
+
if (typeof waitMs === "number" && Number.isFinite(waitMs)) return Math.max(0, Math.floor(waitMs));
|
|
882
|
+
const waitSeconds = params.waitSeconds;
|
|
883
|
+
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
884
|
+
return DEFAULT_WAIT_MS;
|
|
885
|
+
}
|
|
886
|
+
/**
|
|
887
|
+
* 模拟真实用户输入:触发 input、change 事件,兼容 React/Vue 等框架。
|
|
888
|
+
*/
|
|
889
|
+
function dispatchInputEvents(el) {
|
|
890
|
+
el.dispatchEvent(new Event("input", {
|
|
891
|
+
bubbles: true,
|
|
892
|
+
cancelable: true
|
|
893
|
+
}));
|
|
894
|
+
el.dispatchEvent(new Event("change", {
|
|
895
|
+
bubbles: true,
|
|
896
|
+
cancelable: true
|
|
897
|
+
}));
|
|
898
|
+
}
|
|
899
|
+
/**
|
|
900
|
+
* 生成元素的可读描述,用于在操作结果中展示实际命中的 DOM 节点。
|
|
901
|
+
* 格式:<tag#id.class> "文本" [attr=val, ...]
|
|
902
|
+
*/
|
|
903
|
+
function describeElement(el) {
|
|
904
|
+
const tag = el.tagName.toLowerCase();
|
|
905
|
+
const id = el.id ? `#${el.id}` : "";
|
|
906
|
+
const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
|
|
907
|
+
const text = el.textContent?.trim().slice(0, 40) ?? "";
|
|
908
|
+
const textHint = text ? ` "${text}"` : "";
|
|
909
|
+
const hints = [];
|
|
910
|
+
for (const attr of [
|
|
911
|
+
"type",
|
|
912
|
+
"name",
|
|
913
|
+
"placeholder",
|
|
914
|
+
"href",
|
|
915
|
+
"role"
|
|
916
|
+
]) {
|
|
917
|
+
const val = el.getAttribute(attr);
|
|
918
|
+
if (val) hints.push(`${attr}=${val}`);
|
|
919
|
+
}
|
|
920
|
+
return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
|
|
921
|
+
}
|
|
922
|
+
function createDomTool() {
|
|
923
|
+
return {
|
|
924
|
+
name: "dom",
|
|
925
|
+
description: [
|
|
926
|
+
"Perform DOM operations on the current page.",
|
|
927
|
+
"Actions: click, fill, type, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
928
|
+
"Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector."
|
|
929
|
+
].join(" "),
|
|
930
|
+
schema: Type.Object({
|
|
931
|
+
action: Type.String({ description: "DOM action: click | fill | type | get_text | get_attr | set_attr | add_class | remove_class" }),
|
|
932
|
+
selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
|
|
933
|
+
value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions" })),
|
|
934
|
+
attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr actions" })),
|
|
935
|
+
className: Type.Optional(Type.String({ description: "CSS class name for add_class/remove_class" })),
|
|
936
|
+
waitMs: Type.Optional(Type.Number({ description: "Optional wait timeout in ms before action (default: 1000). Use 0 to disable waiting." })),
|
|
937
|
+
waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." }))
|
|
938
|
+
}),
|
|
939
|
+
execute: async (params) => {
|
|
940
|
+
const action = params.action;
|
|
941
|
+
const selector = params.selector;
|
|
942
|
+
const waitMs = resolveWaitMs(params);
|
|
943
|
+
if (!selector) return { content: "缺少 selector 参数" };
|
|
944
|
+
let el;
|
|
945
|
+
if (waitMs > 0) {
|
|
946
|
+
const found = await waitForElement(selector, waitMs);
|
|
947
|
+
if (typeof found === "string") return {
|
|
948
|
+
content: found,
|
|
949
|
+
details: {
|
|
950
|
+
error: true,
|
|
951
|
+
code: "INVALID_SELECTOR",
|
|
952
|
+
action,
|
|
953
|
+
selector
|
|
954
|
+
}
|
|
955
|
+
};
|
|
956
|
+
if (!found) return {
|
|
957
|
+
content: `未找到匹配 "${selector}" 的元素`,
|
|
958
|
+
details: {
|
|
959
|
+
error: true,
|
|
960
|
+
code: "ELEMENT_NOT_FOUND",
|
|
961
|
+
action,
|
|
962
|
+
selector,
|
|
963
|
+
waitMs
|
|
964
|
+
}
|
|
965
|
+
};
|
|
966
|
+
el = found;
|
|
967
|
+
} else {
|
|
968
|
+
const elOrError = queryElement(selector);
|
|
969
|
+
if (typeof elOrError === "string") return {
|
|
970
|
+
content: elOrError,
|
|
971
|
+
details: {
|
|
972
|
+
error: true,
|
|
973
|
+
code: elOrError.startsWith("未找到") ? "ELEMENT_NOT_FOUND" : "INVALID_SELECTOR",
|
|
974
|
+
action,
|
|
975
|
+
selector,
|
|
976
|
+
waitMs
|
|
977
|
+
}
|
|
978
|
+
};
|
|
979
|
+
el = elOrError;
|
|
980
|
+
}
|
|
981
|
+
try {
|
|
982
|
+
switch (action) {
|
|
983
|
+
case "click":
|
|
984
|
+
if (el instanceof HTMLElement) {
|
|
985
|
+
el.focus();
|
|
986
|
+
el.click();
|
|
987
|
+
} else el.dispatchEvent(new MouseEvent("click", { bubbles: true }));
|
|
988
|
+
return { content: `已点击 ${describeElement(el)}` };
|
|
989
|
+
case "fill": {
|
|
990
|
+
const value = params.value;
|
|
991
|
+
if (value === void 0) return { content: "缺少 value 参数" };
|
|
992
|
+
if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
|
|
993
|
+
el.focus();
|
|
994
|
+
el.value = value;
|
|
995
|
+
dispatchInputEvents(el);
|
|
996
|
+
} else if (el instanceof HTMLElement && el.isContentEditable) {
|
|
997
|
+
el.focus();
|
|
998
|
+
el.textContent = value;
|
|
999
|
+
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1000
|
+
} else return { content: `"${selector}" 不是可编辑元素` };
|
|
1001
|
+
return { content: `已填写 ${describeElement(el)}: "${value}"` };
|
|
1002
|
+
}
|
|
1003
|
+
case "type": {
|
|
1004
|
+
const value = params.value;
|
|
1005
|
+
if (value === void 0) return { content: "缺少 value 参数" };
|
|
1006
|
+
if (el instanceof HTMLElement) el.focus();
|
|
1007
|
+
for (const char of value) {
|
|
1008
|
+
el.dispatchEvent(new KeyboardEvent("keydown", {
|
|
1009
|
+
key: char,
|
|
1010
|
+
bubbles: true
|
|
1011
|
+
}));
|
|
1012
|
+
el.dispatchEvent(new KeyboardEvent("keypress", {
|
|
1013
|
+
key: char,
|
|
1014
|
+
bubbles: true
|
|
1015
|
+
}));
|
|
1016
|
+
if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) el.value += char;
|
|
1017
|
+
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1018
|
+
el.dispatchEvent(new KeyboardEvent("keyup", {
|
|
1019
|
+
key: char,
|
|
1020
|
+
bubbles: true
|
|
1021
|
+
}));
|
|
1022
|
+
}
|
|
1023
|
+
return { content: `已逐字输入到 ${describeElement(el)}: "${value}"` };
|
|
1024
|
+
}
|
|
1025
|
+
case "get_text": {
|
|
1026
|
+
const text = el.textContent?.trim() ?? "";
|
|
1027
|
+
return { content: `${describeElement(el)} 的文本内容:${text || "(空)"}` };
|
|
1028
|
+
}
|
|
1029
|
+
case "get_attr": {
|
|
1030
|
+
const attribute = params.attribute;
|
|
1031
|
+
if (!attribute) return { content: "缺少 attribute 参数" };
|
|
1032
|
+
const attrValue = el.getAttribute(attribute);
|
|
1033
|
+
return { content: `${describeElement(el)} 的 ${attribute} = ${attrValue ?? "(不存在)"}` };
|
|
1034
|
+
}
|
|
1035
|
+
case "set_attr": {
|
|
1036
|
+
const attribute = params.attribute;
|
|
1037
|
+
const value = params.value;
|
|
1038
|
+
if (!attribute || value === void 0) return { content: "缺少 attribute 或 value 参数" };
|
|
1039
|
+
el.setAttribute(attribute, value);
|
|
1040
|
+
return { content: `已设置 ${describeElement(el)} 的 ${attribute}="${value}"` };
|
|
1041
|
+
}
|
|
1042
|
+
case "add_class": {
|
|
1043
|
+
const className = params.className;
|
|
1044
|
+
if (!className) return { content: "缺少 className 参数" };
|
|
1045
|
+
el.classList.add(className);
|
|
1046
|
+
return { content: `已添加 class "${className}" 到 ${describeElement(el)}` };
|
|
1047
|
+
}
|
|
1048
|
+
case "remove_class": {
|
|
1049
|
+
const className = params.className;
|
|
1050
|
+
if (!className) return { content: "缺少 className 参数" };
|
|
1051
|
+
el.classList.remove(className);
|
|
1052
|
+
return { content: `已移除 ${describeElement(el)} 的 class "${className}"` };
|
|
1053
|
+
}
|
|
1054
|
+
default: return { content: `未知的 DOM 动作: ${action}` };
|
|
1055
|
+
}
|
|
1056
|
+
} catch (err) {
|
|
1057
|
+
return {
|
|
1058
|
+
content: `DOM 操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1059
|
+
details: {
|
|
1060
|
+
error: true,
|
|
1061
|
+
action,
|
|
1062
|
+
selector
|
|
1063
|
+
}
|
|
1064
|
+
};
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
};
|
|
1068
|
+
}
|
|
1069
|
+
|
|
803
1070
|
//#endregion
|
|
804
1071
|
//#region src/web/page-info-tool.ts
|
|
805
1072
|
/**
|
|
@@ -819,27 +1086,19 @@ function buildSystemPrompt(params = {}) {
|
|
|
819
1086
|
/**
|
|
820
1087
|
* 生成页面 DOM 快照 — 将 DOM 树转为 AI 可理解的文本描述。
|
|
821
1088
|
*
|
|
822
|
-
*
|
|
823
|
-
*
|
|
824
|
-
*
|
|
825
|
-
* 每个元素自动生成基于层级位置的 XPath 引用(ref),
|
|
826
|
-
* AI 可以通过 ref 精确定位元素,无需猜测 CSS 选择器。
|
|
1089
|
+
* 基于 Web API 实现,只遍历可见元素,跳过 script/style/svg 等无意义节点。
|
|
1090
|
+
* 传入 RefStore 时,每个元素生成确定性 hash ID(如 #a1b2c),
|
|
1091
|
+
* AI 通过 hash ID 精确定位元素,无需猜测 CSS 选择器。
|
|
827
1092
|
*
|
|
828
1093
|
* 输出格式示例:
|
|
829
|
-
* [header]
|
|
830
|
-
* [nav]
|
|
831
|
-
* [a] "首页" href="/"
|
|
832
|
-
* [a] "关于" href="/about"
|
|
833
|
-
* [main]
|
|
834
|
-
* [h1] "
|
|
835
|
-
* [input] type="text" placeholder="搜索..."
|
|
836
|
-
* [button] "搜索" id="search-btn" onclick
|
|
837
|
-
*
|
|
838
|
-
* 增强信息:
|
|
839
|
-
* - id:元素的 id 属性
|
|
840
|
-
* - placeholder:输入框的占位文本
|
|
841
|
-
* - 事件绑定:onclick/onchange 等内联事件处理器
|
|
842
|
-
* - 状态属性:disabled/checked/readonly/required 等
|
|
1094
|
+
* [header] #k9f2a
|
|
1095
|
+
* [nav] #m3d7e
|
|
1096
|
+
* [a] "首页" href="/" #p1c4b
|
|
1097
|
+
* [a] "关于" href="/about" #q8e5f
|
|
1098
|
+
* [main] #r2a6d
|
|
1099
|
+
* [h1] "欢迎" #s7g3h
|
|
1100
|
+
* [input] type="text" placeholder="搜索..." #t4j8k
|
|
1101
|
+
* [button] "搜索" id="search-btn" onclick #u5n2m
|
|
843
1102
|
*
|
|
844
1103
|
* @param root - 快照根元素(默认 document.body)
|
|
845
1104
|
* @param options - 快照选项对象,或传入数字作为 maxDepth(向后兼容)
|
|
@@ -849,6 +1108,7 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
849
1108
|
const maxDepth = opts.maxDepth ?? 6;
|
|
850
1109
|
const viewportOnly = opts.viewportOnly ?? true;
|
|
851
1110
|
const pruneLayout = opts.pruneLayout ?? true;
|
|
1111
|
+
const refStore = opts.refStore;
|
|
852
1112
|
const SKIP_TAGS = new Set([
|
|
853
1113
|
"SCRIPT",
|
|
854
1114
|
"STYLE",
|
|
@@ -967,7 +1227,7 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
967
1227
|
if (elId) attrs.push(`id="${elId}"`);
|
|
968
1228
|
const className = el.getAttribute("class")?.trim();
|
|
969
1229
|
if (className) {
|
|
970
|
-
const classes = className.split(/\s+/).filter(
|
|
1230
|
+
const classes = className.split(/\s+/).filter((c) => c && !c.startsWith("data-v-") && c.length < 30).slice(0, 2).join(" ");
|
|
971
1231
|
if (classes) attrs.push(`class="${classes}"`);
|
|
972
1232
|
}
|
|
973
1233
|
for (const attr of INTERACTIVE_ATTRS) {
|
|
@@ -979,11 +1239,11 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
979
1239
|
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith(EVENT_PREFIX)) events.push(attrObj.name);
|
|
980
1240
|
if (events.length > 0) attrs.push(`events=[${events.join(",")}]`);
|
|
981
1241
|
const dataAttrs = [];
|
|
982
|
-
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith("data-") && dataAttrs.length <
|
|
1242
|
+
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith("data-") && !attrObj.name.match(/^data-v-/) && dataAttrs.length < 2) dataAttrs.push(`${attrObj.name}="${attrObj.value.slice(0, 30)}"`);
|
|
983
1243
|
if (dataAttrs.length > 0) attrs.push(...dataAttrs);
|
|
984
1244
|
if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.value) {
|
|
985
1245
|
const currentVal = el.value.slice(0, 60);
|
|
986
|
-
if (el.getAttribute("value") !== currentVal) attrs.push(`
|
|
1246
|
+
if (el.getAttribute("value") !== currentVal) attrs.push(`val="${currentVal}"`);
|
|
987
1247
|
}
|
|
988
1248
|
let directText = "";
|
|
989
1249
|
for (let i = 0; i < el.childNodes.length; i++) {
|
|
@@ -1003,9 +1263,12 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1003
1263
|
return childLines.join("\n");
|
|
1004
1264
|
}
|
|
1005
1265
|
let line = `${indent}[${tag}]`;
|
|
1006
|
-
if (directText) line += ` "${directText.slice(0,
|
|
1266
|
+
if (directText) line += ` "${directText.slice(0, 60)}"`;
|
|
1007
1267
|
if (attrs.length) line += ` ${attrs.join(" ")}`;
|
|
1008
|
-
|
|
1268
|
+
if (refStore) {
|
|
1269
|
+
const hashId = refStore.set(el, currentPath);
|
|
1270
|
+
line += ` #${hashId}`;
|
|
1271
|
+
} else line += ` ref="${currentPath}"`;
|
|
1009
1272
|
const lines = [line];
|
|
1010
1273
|
for (let i = 0; i < el.children.length; i++) {
|
|
1011
1274
|
const childResult = walk(el.children[i], depth + 1, currentPath);
|
|
@@ -1013,366 +1276,88 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1013
1276
|
}
|
|
1014
1277
|
return lines.join("\n");
|
|
1015
1278
|
}
|
|
1016
|
-
return walk(root, 0, "") || "(空页面)";
|
|
1017
|
-
}
|
|
1018
|
-
/**
|
|
1019
|
-
* 查询所有匹配元素并返回摘要信息(标签、文本、关键属性)。
|
|
1020
|
-
*/
|
|
1021
|
-
function queryAllElements(selector, limit = 20) {
|
|
1022
|
-
try {
|
|
1023
|
-
const elements = document.querySelectorAll(selector);
|
|
1024
|
-
if (elements.length === 0) return `未找到匹配 "${selector}" 的元素`;
|
|
1025
|
-
const results = [`找到 ${elements.length} 个元素:`];
|
|
1026
|
-
const count = Math.min(elements.length, limit);
|
|
1027
|
-
for (let i = 0; i < count; i++) {
|
|
1028
|
-
const el = elements[i];
|
|
1029
|
-
const tag = el.tagName.toLowerCase();
|
|
1030
|
-
const text = el.textContent?.trim().slice(0, 60) ?? "";
|
|
1031
|
-
const id = el.id ? `#${el.id}` : "";
|
|
1032
|
-
const cls = el.className && typeof el.className === "string" ? `.${el.className.split(" ").filter(Boolean).join(".")}` : "";
|
|
1033
|
-
results.push(` ${i + 1}. <${tag}${id}${cls}> "${text}"`);
|
|
1034
|
-
}
|
|
1035
|
-
if (elements.length > limit) results.push(` ...还有 ${elements.length - limit} 个元素`);
|
|
1036
|
-
return results.join("\n");
|
|
1037
|
-
} catch (e) {
|
|
1038
|
-
return `选择器语法错误: ${selector}`;
|
|
1039
|
-
}
|
|
1040
|
-
}
|
|
1041
|
-
function createPageInfoTool() {
|
|
1042
|
-
return {
|
|
1043
|
-
name: "page_info",
|
|
1044
|
-
description: [
|
|
1045
|
-
"Get information about the current page.",
|
|
1046
|
-
"Actions: get_url, get_title, get_selection (selected text),",
|
|
1047
|
-
"get_viewport (size & scroll), snapshot (DOM structure), query_all (find all matching elements)."
|
|
1048
|
-
].join(" "),
|
|
1049
|
-
schema: Type.Object({
|
|
1050
|
-
action: Type.String({ description: "Info action: get_url | get_title | get_selection | get_viewport | snapshot | query_all" }),
|
|
1051
|
-
selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
|
|
1052
|
-
maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
|
|
1053
|
-
viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
|
|
1054
|
-
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" }))
|
|
1055
|
-
}),
|
|
1056
|
-
execute: async (params) => {
|
|
1057
|
-
const action = params.action;
|
|
1058
|
-
try {
|
|
1059
|
-
switch (action) {
|
|
1060
|
-
case "get_url": return { content: window.location.href };
|
|
1061
|
-
case "get_title": return { content: document.title || "(无标题)" };
|
|
1062
|
-
case "get_selection": return { content: (window.getSelection()?.toString().trim() ?? "") || "(未选中任何文本)" };
|
|
1063
|
-
case "get_viewport": {
|
|
1064
|
-
const info = {
|
|
1065
|
-
viewportWidth: window.innerWidth,
|
|
1066
|
-
viewportHeight: window.innerHeight,
|
|
1067
|
-
scrollX: window.scrollX,
|
|
1068
|
-
scrollY: window.scrollY,
|
|
1069
|
-
pageWidth: document.documentElement.scrollWidth,
|
|
1070
|
-
pageHeight: document.documentElement.scrollHeight
|
|
1071
|
-
};
|
|
1072
|
-
return { content: JSON.stringify(info, null, 2) };
|
|
1073
|
-
}
|
|
1074
|
-
case "snapshot": {
|
|
1075
|
-
const maxDepth = params.maxDepth ?? 6;
|
|
1076
|
-
const viewportOnly = params.viewportOnly ?? true;
|
|
1077
|
-
const pruneLayout = params.pruneLayout ?? true;
|
|
1078
|
-
return { content: generateSnapshot(document.body, {
|
|
1079
|
-
maxDepth,
|
|
1080
|
-
viewportOnly,
|
|
1081
|
-
pruneLayout
|
|
1082
|
-
}) };
|
|
1083
|
-
}
|
|
1084
|
-
case "query_all": {
|
|
1085
|
-
const selector = params.selector;
|
|
1086
|
-
if (!selector) return { content: "缺少 selector 参数" };
|
|
1087
|
-
return { content: queryAllElements(selector) };
|
|
1088
|
-
}
|
|
1089
|
-
default: return { content: `未知的页面信息动作: ${action}` };
|
|
1090
|
-
}
|
|
1091
|
-
} catch (err) {
|
|
1092
|
-
return {
|
|
1093
|
-
content: `页面信息操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1094
|
-
details: {
|
|
1095
|
-
error: true,
|
|
1096
|
-
action
|
|
1097
|
-
}
|
|
1098
|
-
};
|
|
1099
|
-
}
|
|
1100
|
-
}
|
|
1101
|
-
};
|
|
1102
|
-
}
|
|
1103
|
-
|
|
1104
|
-
//#endregion
|
|
1105
|
-
//#region src/web/dom-tool.ts
|
|
1106
|
-
/**
|
|
1107
|
-
* DOM Tool — 基于 Web API 的 DOM 操作工具。
|
|
1108
|
-
*
|
|
1109
|
-
* 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
|
|
1110
|
-
* 运行环境:浏览器 Content Script。
|
|
1111
|
-
*
|
|
1112
|
-
* 支持 8 种动作:
|
|
1113
|
-
* click — 点击元素
|
|
1114
|
-
* fill — 填写输入框(清空后设值)
|
|
1115
|
-
* type — 逐字符模拟键入
|
|
1116
|
-
* get_text — 获取元素文本内容
|
|
1117
|
-
* get_attr — 获取元素属性值
|
|
1118
|
-
* set_attr — 设置元素属性
|
|
1119
|
-
* add_class — 添加 CSS 类名
|
|
1120
|
-
* remove_class — 移除 CSS 类名
|
|
1121
|
-
*/
|
|
1122
|
-
const DEFAULT_WAIT_MS = 1e3;
|
|
1123
|
-
function sleep(ms) {
|
|
1124
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1125
|
-
}
|
|
1126
|
-
/**
|
|
1127
|
-
* 通过快照 ref(XPath 路径)解析到 DOM 元素。
|
|
1128
|
-
*
|
|
1129
|
-
* ref 格式示例:/body/div[1]/main/form/input[2]
|
|
1130
|
-
* 每段为 tagName,可选 [n] 表示同标签兄弟中第 n 个(1-based)。
|
|
1131
|
-
*/
|
|
1132
|
-
function resolveRef(ref) {
|
|
1133
|
-
const segments = ref.split("/").filter(Boolean);
|
|
1134
|
-
let current = document.documentElement;
|
|
1135
|
-
for (let i = 0; i < segments.length; i++) {
|
|
1136
|
-
const seg = segments[i];
|
|
1137
|
-
if (!current) return null;
|
|
1138
|
-
const match = seg.match(/^([a-z0-9-]+)(?:\[(\d+)\])?$/i);
|
|
1139
|
-
if (!match) return null;
|
|
1140
|
-
const tag = match[1].toUpperCase();
|
|
1141
|
-
const index = match[2] ? parseInt(match[2], 10) : 1;
|
|
1142
|
-
if (i === 0 && current.tagName === tag) continue;
|
|
1143
|
-
const children = Array.from(current.children).filter((c) => c.tagName === tag);
|
|
1144
|
-
const sameTagCount = children.length;
|
|
1145
|
-
if (sameTagCount === 0) return null;
|
|
1146
|
-
if (sameTagCount === 1) current = children[0];
|
|
1147
|
-
else {
|
|
1148
|
-
if (index < 1 || index > sameTagCount) return null;
|
|
1149
|
-
current = children[index - 1];
|
|
1150
|
-
}
|
|
1151
|
-
}
|
|
1152
|
-
return current;
|
|
1153
|
-
}
|
|
1154
|
-
/**
|
|
1155
|
-
* 安全地查询 DOM 元素。
|
|
1156
|
-
*
|
|
1157
|
-
* 支持两种定位方式:
|
|
1158
|
-
* - ref 路径(以 "/" 开头):使用快照生成的 XPath 精确定位
|
|
1159
|
-
* - CSS 选择器(其他):传统 querySelector
|
|
1160
|
-
*/
|
|
1161
|
-
function queryElement(selector) {
|
|
1162
|
-
try {
|
|
1163
|
-
if (selector.startsWith("/")) {
|
|
1164
|
-
const el = resolveRef(selector);
|
|
1165
|
-
if (!el) return `未找到 ref "${selector}" 对应的元素`;
|
|
1166
|
-
return el;
|
|
1167
|
-
}
|
|
1168
|
-
const el = document.querySelector(selector);
|
|
1169
|
-
if (!el) return `未找到匹配 "${selector}" 的元素`;
|
|
1170
|
-
return el;
|
|
1171
|
-
} catch (e) {
|
|
1172
|
-
return `选择器语法错误: ${selector}`;
|
|
1173
|
-
}
|
|
1174
|
-
}
|
|
1175
|
-
/**
|
|
1176
|
-
* 在给定超时时间内轮询查找元素。
|
|
1177
|
-
* - 返回 Element:找到元素
|
|
1178
|
-
* - 返回 string:选择器语法错误
|
|
1179
|
-
* - 返回 null:超时未找到
|
|
1180
|
-
*/
|
|
1181
|
-
async function waitForElement(selector, timeoutMs) {
|
|
1182
|
-
const start = Date.now();
|
|
1183
|
-
while (Date.now() - start <= timeoutMs) {
|
|
1184
|
-
const elOrError = queryElement(selector);
|
|
1185
|
-
if (typeof elOrError !== "string") return elOrError;
|
|
1186
|
-
if (elOrError.startsWith("选择器语法错误")) return elOrError;
|
|
1187
|
-
await sleep(100);
|
|
1188
|
-
}
|
|
1189
|
-
return null;
|
|
1190
|
-
}
|
|
1191
|
-
function resolveWaitMs(params) {
|
|
1192
|
-
const waitMs = params.waitMs;
|
|
1193
|
-
if (typeof waitMs === "number" && Number.isFinite(waitMs)) return Math.max(0, Math.floor(waitMs));
|
|
1194
|
-
const waitSeconds = params.waitSeconds;
|
|
1195
|
-
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
1196
|
-
return DEFAULT_WAIT_MS;
|
|
1197
|
-
}
|
|
1198
|
-
/**
|
|
1199
|
-
* 模拟真实用户输入:触发 input、change 事件,兼容 React/Vue 等框架。
|
|
1200
|
-
*/
|
|
1201
|
-
function dispatchInputEvents(el) {
|
|
1202
|
-
el.dispatchEvent(new Event("input", {
|
|
1203
|
-
bubbles: true,
|
|
1204
|
-
cancelable: true
|
|
1205
|
-
}));
|
|
1206
|
-
el.dispatchEvent(new Event("change", {
|
|
1207
|
-
bubbles: true,
|
|
1208
|
-
cancelable: true
|
|
1209
|
-
}));
|
|
1279
|
+
return walk(root, 0, "") || "(空页面)";
|
|
1210
1280
|
}
|
|
1211
1281
|
/**
|
|
1212
|
-
*
|
|
1213
|
-
* 格式:<tag#id.class> "文本" [attr=val, ...]
|
|
1282
|
+
* 查询所有匹配元素并返回摘要信息(标签、文本、关键属性)。
|
|
1214
1283
|
*/
|
|
1215
|
-
function
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1284
|
+
function queryAllElements(selector, limit = 20) {
|
|
1285
|
+
try {
|
|
1286
|
+
const elements = document.querySelectorAll(selector);
|
|
1287
|
+
if (elements.length === 0) return `未找到匹配 "${selector}" 的元素`;
|
|
1288
|
+
const results = [`找到 ${elements.length} 个元素:`];
|
|
1289
|
+
const count = Math.min(elements.length, limit);
|
|
1290
|
+
for (let i = 0; i < count; i++) {
|
|
1291
|
+
const el = elements[i];
|
|
1292
|
+
const tag = el.tagName.toLowerCase();
|
|
1293
|
+
const text = el.textContent?.trim().slice(0, 60) ?? "";
|
|
1294
|
+
const id = el.id ? `#${el.id}` : "";
|
|
1295
|
+
const cls = el.className && typeof el.className === "string" ? `.${el.className.split(" ").filter(Boolean).join(".")}` : "";
|
|
1296
|
+
results.push(` ${i + 1}. <${tag}${id}${cls}> "${text}"`);
|
|
1297
|
+
}
|
|
1298
|
+
if (elements.length > limit) results.push(` ...还有 ${elements.length - limit} 个元素`);
|
|
1299
|
+
return results.join("\n");
|
|
1300
|
+
} catch (e) {
|
|
1301
|
+
return `选择器语法错误: ${selector}`;
|
|
1231
1302
|
}
|
|
1232
|
-
return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
|
|
1233
1303
|
}
|
|
1234
|
-
function
|
|
1304
|
+
function createPageInfoTool() {
|
|
1235
1305
|
return {
|
|
1236
|
-
name: "
|
|
1306
|
+
name: "page_info",
|
|
1237
1307
|
description: [
|
|
1238
|
-
"
|
|
1239
|
-
"Actions:
|
|
1240
|
-
"
|
|
1241
|
-
"CSS selectors are also supported but ref paths are preferred for accuracy."
|
|
1308
|
+
"Get information about the current page.",
|
|
1309
|
+
"Actions: get_url, get_title, get_selection (selected text),",
|
|
1310
|
+
"get_viewport (size & scroll), snapshot (DOM structure), query_all (find all matching elements)."
|
|
1242
1311
|
].join(" "),
|
|
1243
1312
|
schema: Type.Object({
|
|
1244
|
-
action: Type.String({ description: "
|
|
1245
|
-
selector: Type.String({ description: "
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
waitMs: Type.Optional(Type.Number({ description: "Optional wait timeout in ms before action (default: 1000). Use 0 to disable waiting." })),
|
|
1250
|
-
waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." }))
|
|
1313
|
+
action: Type.String({ description: "Info action: get_url | get_title | get_selection | get_viewport | snapshot | query_all" }),
|
|
1314
|
+
selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
|
|
1315
|
+
maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
|
|
1316
|
+
viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
|
|
1317
|
+
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" }))
|
|
1251
1318
|
}),
|
|
1252
1319
|
execute: async (params) => {
|
|
1253
1320
|
const action = params.action;
|
|
1254
|
-
const selector = params.selector;
|
|
1255
|
-
const waitMs = resolveWaitMs(params);
|
|
1256
|
-
if (!selector) return { content: "缺少 selector 参数" };
|
|
1257
|
-
let el;
|
|
1258
|
-
if (waitMs > 0) {
|
|
1259
|
-
const found = await waitForElement(selector, waitMs);
|
|
1260
|
-
if (typeof found === "string") return {
|
|
1261
|
-
content: found,
|
|
1262
|
-
details: {
|
|
1263
|
-
error: true,
|
|
1264
|
-
code: "INVALID_SELECTOR",
|
|
1265
|
-
action,
|
|
1266
|
-
selector
|
|
1267
|
-
}
|
|
1268
|
-
};
|
|
1269
|
-
if (!found) return {
|
|
1270
|
-
content: `未找到匹配 "${selector}" 的元素`,
|
|
1271
|
-
details: {
|
|
1272
|
-
error: true,
|
|
1273
|
-
code: "ELEMENT_NOT_FOUND",
|
|
1274
|
-
action,
|
|
1275
|
-
selector,
|
|
1276
|
-
waitMs
|
|
1277
|
-
}
|
|
1278
|
-
};
|
|
1279
|
-
el = found;
|
|
1280
|
-
} else {
|
|
1281
|
-
const elOrError = queryElement(selector);
|
|
1282
|
-
if (typeof elOrError === "string") return {
|
|
1283
|
-
content: elOrError,
|
|
1284
|
-
details: {
|
|
1285
|
-
error: true,
|
|
1286
|
-
code: elOrError.startsWith("未找到") ? "ELEMENT_NOT_FOUND" : "INVALID_SELECTOR",
|
|
1287
|
-
action,
|
|
1288
|
-
selector,
|
|
1289
|
-
waitMs
|
|
1290
|
-
}
|
|
1291
|
-
};
|
|
1292
|
-
el = elOrError;
|
|
1293
|
-
}
|
|
1294
1321
|
try {
|
|
1295
1322
|
switch (action) {
|
|
1296
|
-
case "
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
} else if (el instanceof HTMLElement && el.isContentEditable) {
|
|
1310
|
-
el.focus();
|
|
1311
|
-
el.textContent = value;
|
|
1312
|
-
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1313
|
-
} else return { content: `"${selector}" 不是可编辑元素` };
|
|
1314
|
-
return { content: `已填写 ${describeElement(el)}: "${value}"` };
|
|
1315
|
-
}
|
|
1316
|
-
case "type": {
|
|
1317
|
-
const value = params.value;
|
|
1318
|
-
if (value === void 0) return { content: "缺少 value 参数" };
|
|
1319
|
-
if (el instanceof HTMLElement) el.focus();
|
|
1320
|
-
for (const char of value) {
|
|
1321
|
-
el.dispatchEvent(new KeyboardEvent("keydown", {
|
|
1322
|
-
key: char,
|
|
1323
|
-
bubbles: true
|
|
1324
|
-
}));
|
|
1325
|
-
el.dispatchEvent(new KeyboardEvent("keypress", {
|
|
1326
|
-
key: char,
|
|
1327
|
-
bubbles: true
|
|
1328
|
-
}));
|
|
1329
|
-
if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) el.value += char;
|
|
1330
|
-
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1331
|
-
el.dispatchEvent(new KeyboardEvent("keyup", {
|
|
1332
|
-
key: char,
|
|
1333
|
-
bubbles: true
|
|
1334
|
-
}));
|
|
1335
|
-
}
|
|
1336
|
-
return { content: `已逐字输入到 ${describeElement(el)}: "${value}"` };
|
|
1337
|
-
}
|
|
1338
|
-
case "get_text": {
|
|
1339
|
-
const text = el.textContent?.trim() ?? "";
|
|
1340
|
-
return { content: `${describeElement(el)} 的文本内容:${text || "(空)"}` };
|
|
1341
|
-
}
|
|
1342
|
-
case "get_attr": {
|
|
1343
|
-
const attribute = params.attribute;
|
|
1344
|
-
if (!attribute) return { content: "缺少 attribute 参数" };
|
|
1345
|
-
const attrValue = el.getAttribute(attribute);
|
|
1346
|
-
return { content: `${describeElement(el)} 的 ${attribute} = ${attrValue ?? "(不存在)"}` };
|
|
1347
|
-
}
|
|
1348
|
-
case "set_attr": {
|
|
1349
|
-
const attribute = params.attribute;
|
|
1350
|
-
const value = params.value;
|
|
1351
|
-
if (!attribute || value === void 0) return { content: "缺少 attribute 或 value 参数" };
|
|
1352
|
-
el.setAttribute(attribute, value);
|
|
1353
|
-
return { content: `已设置 ${describeElement(el)} 的 ${attribute}="${value}"` };
|
|
1323
|
+
case "get_url": return { content: window.location.href };
|
|
1324
|
+
case "get_title": return { content: document.title || "(无标题)" };
|
|
1325
|
+
case "get_selection": return { content: (window.getSelection()?.toString().trim() ?? "") || "(未选中任何文本)" };
|
|
1326
|
+
case "get_viewport": {
|
|
1327
|
+
const info = {
|
|
1328
|
+
viewportWidth: window.innerWidth,
|
|
1329
|
+
viewportHeight: window.innerHeight,
|
|
1330
|
+
scrollX: window.scrollX,
|
|
1331
|
+
scrollY: window.scrollY,
|
|
1332
|
+
pageWidth: document.documentElement.scrollWidth,
|
|
1333
|
+
pageHeight: document.documentElement.scrollHeight
|
|
1334
|
+
};
|
|
1335
|
+
return { content: JSON.stringify(info, null, 2) };
|
|
1354
1336
|
}
|
|
1355
|
-
case "
|
|
1356
|
-
const
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
return { content:
|
|
1337
|
+
case "snapshot": {
|
|
1338
|
+
const maxDepth = params.maxDepth ?? 6;
|
|
1339
|
+
const viewportOnly = params.viewportOnly ?? true;
|
|
1340
|
+
const pruneLayout = params.pruneLayout ?? true;
|
|
1341
|
+
return { content: generateSnapshot(document.body, {
|
|
1342
|
+
maxDepth,
|
|
1343
|
+
viewportOnly,
|
|
1344
|
+
pruneLayout,
|
|
1345
|
+
refStore: getActiveRefStore()
|
|
1346
|
+
}) };
|
|
1360
1347
|
}
|
|
1361
|
-
case "
|
|
1362
|
-
const
|
|
1363
|
-
if (!
|
|
1364
|
-
|
|
1365
|
-
return { content: `已移除 ${describeElement(el)} 的 class "${className}"` };
|
|
1348
|
+
case "query_all": {
|
|
1349
|
+
const selector = params.selector;
|
|
1350
|
+
if (!selector) return { content: "缺少 selector 参数" };
|
|
1351
|
+
return { content: queryAllElements(selector) };
|
|
1366
1352
|
}
|
|
1367
|
-
default: return { content:
|
|
1353
|
+
default: return { content: `未知的页面信息动作: ${action}` };
|
|
1368
1354
|
}
|
|
1369
1355
|
} catch (err) {
|
|
1370
1356
|
return {
|
|
1371
|
-
content:
|
|
1357
|
+
content: `页面信息操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1372
1358
|
details: {
|
|
1373
1359
|
error: true,
|
|
1374
|
-
action
|
|
1375
|
-
selector
|
|
1360
|
+
action
|
|
1376
1361
|
}
|
|
1377
1362
|
};
|
|
1378
1363
|
}
|
|
@@ -1701,6 +1686,106 @@ function createEvaluateTool() {
|
|
|
1701
1686
|
};
|
|
1702
1687
|
}
|
|
1703
1688
|
|
|
1689
|
+
//#endregion
|
|
1690
|
+
//#region src/web/ref-store.ts
|
|
1691
|
+
/**
|
|
1692
|
+
* RefStore — 快照 hash ID 与 DOM 元素的映射表。
|
|
1693
|
+
*
|
|
1694
|
+
* 快照生成时,根据元素的 DOM 路径 + 页面 URL 生成确定性 hash ID,
|
|
1695
|
+
* 同时保存 ID → Element 的映射。AI 使用 hash ID 作为 selector 定位元素,
|
|
1696
|
+
* 免去超长 XPath 路径,大幅减少 token 消耗。
|
|
1697
|
+
*
|
|
1698
|
+
* 优势:
|
|
1699
|
+
* - **确定性**:同一元素无论快照顺序,始终得到相同 ID
|
|
1700
|
+
* - **并发安全**:多次快照不会产生 ID 冲突
|
|
1701
|
+
* - **跨页面隔离**:URL hash 作为命名空间,不同页面元素 ID 互不碰撞
|
|
1702
|
+
*
|
|
1703
|
+
* 生命周期:每次 WebAgent.chat() 调用时创建,对话结束后清空。
|
|
1704
|
+
*
|
|
1705
|
+
* 使用方:
|
|
1706
|
+
* page-info-tool.ts — generateSnapshot() 写入映射
|
|
1707
|
+
* dom-tool.ts — queryElement() 读取映射
|
|
1708
|
+
* index.ts — WebAgent 持有实例,管理生命周期
|
|
1709
|
+
*/
|
|
1710
|
+
/**
|
|
1711
|
+
* FNV-1a 32-bit hash — 简单高效的字符串散列。
|
|
1712
|
+
* 分布均匀,碰撞率低,适合生成短 ID。
|
|
1713
|
+
*/
|
|
1714
|
+
function fnv1a(str) {
|
|
1715
|
+
let h = 2166136261;
|
|
1716
|
+
for (let i = 0; i < str.length; i++) {
|
|
1717
|
+
h ^= str.charCodeAt(i);
|
|
1718
|
+
h = Math.imul(h, 16777619);
|
|
1719
|
+
}
|
|
1720
|
+
return h >>> 0;
|
|
1721
|
+
}
|
|
1722
|
+
/**
|
|
1723
|
+
* hash ID → DOM 元素的映射存储。
|
|
1724
|
+
*
|
|
1725
|
+
* - `set(el, path)` 由快照生成时调用,返回确定性 hash ID
|
|
1726
|
+
* - `get(id)` 由 dom-tool 查询时调用,根据 hash ID 取回元素
|
|
1727
|
+
* - `has(id)` 检查 ID 是否存在(用于 selector 类型判断)
|
|
1728
|
+
* - `clear()` 每次对话结束后清空
|
|
1729
|
+
*/
|
|
1730
|
+
var RefStore = class {
|
|
1731
|
+
map = /* @__PURE__ */ new Map();
|
|
1732
|
+
/** 页面 URL 的 hash 前缀,用于跨页面命名空间隔离 */
|
|
1733
|
+
urlKey;
|
|
1734
|
+
/**
|
|
1735
|
+
* @param url 当前页面 URL(可选)。传入后作为 hash 命名空间,
|
|
1736
|
+
* 使不同页面的相同 DOM 路径产生不同 ID。
|
|
1737
|
+
*/
|
|
1738
|
+
constructor(url) {
|
|
1739
|
+
this.urlKey = url ?? "";
|
|
1740
|
+
}
|
|
1741
|
+
/**
|
|
1742
|
+
* 注册一个元素,返回确定性 hash ID。
|
|
1743
|
+
* 相同 URL + path 始终产生相同 ID(并发安全)。
|
|
1744
|
+
*
|
|
1745
|
+
* @param el DOM 元素引用
|
|
1746
|
+
* @param path 元素的 XPath-like 路径(如 "/body/div[1]/main/button")
|
|
1747
|
+
*/
|
|
1748
|
+
set(el, path) {
|
|
1749
|
+
const baseId = fnv1a(this.urlKey + path).toString(36);
|
|
1750
|
+
let id = baseId;
|
|
1751
|
+
let suffix = 2;
|
|
1752
|
+
while (this.map.has(id) && this.map.get(id) !== el) id = baseId + suffix++;
|
|
1753
|
+
this.map.set(id, el);
|
|
1754
|
+
return id;
|
|
1755
|
+
}
|
|
1756
|
+
/**
|
|
1757
|
+
* 根据 hash ID 获取 DOM 元素。
|
|
1758
|
+
* 返回 Element 或 undefined(ID 不存在或元素已被移除)。
|
|
1759
|
+
*/
|
|
1760
|
+
get(id) {
|
|
1761
|
+
return this.map.get(id);
|
|
1762
|
+
}
|
|
1763
|
+
/** 检查 hash ID 是否存在 */
|
|
1764
|
+
has(id) {
|
|
1765
|
+
return this.map.has(id);
|
|
1766
|
+
}
|
|
1767
|
+
/** 清空所有映射 */
|
|
1768
|
+
clear() {
|
|
1769
|
+
this.map.clear();
|
|
1770
|
+
}
|
|
1771
|
+
/**
|
|
1772
|
+
* 重置映射表:清空所有映射,并可选更新 URL 命名空间。
|
|
1773
|
+
*
|
|
1774
|
+
* 用于页面导航后刷新 RefStore:旧的 hash ID → Element 映射已失效,
|
|
1775
|
+
* 需要用新 URL 重新生成确定性 hash。
|
|
1776
|
+
*
|
|
1777
|
+
* @param url 新的页面 URL(不传则保持原 URL 命名空间)
|
|
1778
|
+
*/
|
|
1779
|
+
reset(url) {
|
|
1780
|
+
this.map.clear();
|
|
1781
|
+
if (url !== void 0) this.urlKey = url;
|
|
1782
|
+
}
|
|
1783
|
+
/** 当前映射数量 */
|
|
1784
|
+
get size() {
|
|
1785
|
+
return this.map.size;
|
|
1786
|
+
}
|
|
1787
|
+
};
|
|
1788
|
+
|
|
1704
1789
|
//#endregion
|
|
1705
1790
|
//#region src/web/messaging.ts
|
|
1706
1791
|
/**
|
|
@@ -1931,28 +2016,25 @@ var WebAgent = class {
|
|
|
1931
2016
|
async chat(message) {
|
|
1932
2017
|
const client = this.client ?? this.createBuiltinClient();
|
|
1933
2018
|
let systemPrompt = this.customSystemPrompt ?? buildSystemPrompt({ tools: this.registry.getDefinitions() });
|
|
2019
|
+
const refStore = new RefStore(globalThis.location?.href);
|
|
2020
|
+
setActiveRefStore(refStore);
|
|
1934
2021
|
if (this.autoSnapshot) try {
|
|
1935
2022
|
const snapshot = generateSnapshot(document.body, {
|
|
1936
2023
|
maxDepth: 8,
|
|
1937
|
-
...this.snapshotOptions
|
|
2024
|
+
...this.snapshotOptions,
|
|
2025
|
+
refStore
|
|
1938
2026
|
});
|
|
1939
2027
|
this.callbacks.onSnapshot?.(snapshot);
|
|
1940
|
-
systemPrompt +=
|
|
1941
|
-
"\n\n## 当前页面 DOM 快照(实时生成)\n",
|
|
1942
|
-
"每个元素末尾的 ref=\"...\" 是基于层级位置生成的唯一路径。",
|
|
1943
|
-
"操作元素时,必须使用 ref 路径作为 selector 参数(如 /body/main/form/input)。\n",
|
|
1944
|
-
"```",
|
|
1945
|
-
snapshot,
|
|
1946
|
-
"```\n",
|
|
1947
|
-
"## 操作规则\n",
|
|
1948
|
-
"1. 从快照中找到目标元素,复制其 ref 路径。",
|
|
1949
|
-
"2. 将 ref 路径作为 dom 工具的 selector 参数传入。",
|
|
1950
|
-
"3. 禁止猜测 CSS 选择器(如 \"button\"、\"#send\"),必须使用快照中的 ref。",
|
|
1951
|
-
"4. 如果快照中看不到目标元素,先滚动页面或调整 maxDepth 获取更深的快照。",
|
|
1952
|
-
"5. 先规划操作步骤,再按顺序逐步执行。",
|
|
1953
|
-
"6. **语义选择**:当页面存在多个文本相同的元素时,严禁随意选择。必须根据 ref 路径的父级结构、周围兄弟元素、所在功能区域来判断哪个才是用户意图中的目标。例如聊天区的\"发送\"和表单的\"提交\"是不同功能,要结合用户当前操作的上下文来选择正确的元素。"
|
|
1954
|
-
].join("\n");
|
|
2028
|
+
systemPrompt += `\n\n## 当前页面 DOM 快照\n\n\`\`\`\n${snapshot}\n\`\`\``;
|
|
1955
2029
|
} catch {}
|
|
2030
|
+
const wrappedCallbacks = {
|
|
2031
|
+
...this.callbacks,
|
|
2032
|
+
onBeforeRecoverySnapshot: (newUrl) => {
|
|
2033
|
+
if (newUrl !== void 0) refStore.reset(newUrl);
|
|
2034
|
+
else refStore.clear();
|
|
2035
|
+
this.callbacks.onBeforeRecoverySnapshot?.(newUrl);
|
|
2036
|
+
}
|
|
2037
|
+
};
|
|
1956
2038
|
const result = await executeAgentLoop({
|
|
1957
2039
|
client,
|
|
1958
2040
|
registry: this.registry,
|
|
@@ -1961,9 +2043,11 @@ var WebAgent = class {
|
|
|
1961
2043
|
history: this.memory ? this.history : void 0,
|
|
1962
2044
|
dryRun: this.dryRun,
|
|
1963
2045
|
maxRounds: this.maxRounds,
|
|
1964
|
-
callbacks:
|
|
2046
|
+
callbacks: wrappedCallbacks
|
|
1965
2047
|
});
|
|
1966
2048
|
if (this.memory) this.history = result.messages;
|
|
2049
|
+
refStore.clear();
|
|
2050
|
+
setActiveRefStore(void 0);
|
|
1967
2051
|
return result;
|
|
1968
2052
|
}
|
|
1969
2053
|
/**
|