agentpage 0.0.26 → 0.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -6
- package/dist/index.d.mts +23 -3
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +788 -278
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -10,7 +10,25 @@ const DEFAULT_MAX_ROUNDS = 40;
|
|
|
10
10
|
const DEFAULT_RECOVERY_WAIT_MS = 100;
|
|
11
11
|
const DEFAULT_ACTION_RECOVERY_ROUNDS = 2;
|
|
12
12
|
const DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2;
|
|
13
|
-
const DEFAULT_NOT_FOUND_RETRY_WAIT_MS =
|
|
13
|
+
const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 1e3;
|
|
14
|
+
const DEFAULT_ROUND_STABILITY_WAIT_TIMEOUT_MS = 4e3;
|
|
15
|
+
const DEFAULT_ROUND_STABILITY_WAIT_QUIET_MS = 200;
|
|
16
|
+
const DEFAULT_ROUND_STABILITY_WAIT_LOADING_SELECTORS = [
|
|
17
|
+
".ant-spin",
|
|
18
|
+
".ant-spin-spinning",
|
|
19
|
+
".ant-skeleton",
|
|
20
|
+
".el-loading-mask",
|
|
21
|
+
".bk-loading",
|
|
22
|
+
".bk-spin-loading",
|
|
23
|
+
".bk-skeleton",
|
|
24
|
+
".bk-sideslider-loading",
|
|
25
|
+
".t-loading",
|
|
26
|
+
".t-skeleton",
|
|
27
|
+
".t-skeleton__row",
|
|
28
|
+
"[aria-busy=\"true\"]",
|
|
29
|
+
".skeleton",
|
|
30
|
+
".loading"
|
|
31
|
+
];
|
|
14
32
|
/** 快照起始标记 — 用于在消息中识别快照边界 */
|
|
15
33
|
const SNAPSHOT_START = "<!-- SNAPSHOT_START -->";
|
|
16
34
|
/** 快照结束标记 */
|
|
@@ -20,15 +38,201 @@ const SNAPSHOT_OUTDATED = "[此快照已过期,请参考对话中最新的快
|
|
|
20
38
|
|
|
21
39
|
//#endregion
|
|
22
40
|
//#region src/core/agent-loop/helpers.ts
|
|
23
|
-
/**
|
|
41
|
+
/**
|
|
42
|
+
* 异步睡眠。
|
|
43
|
+
*
|
|
44
|
+
* 用于重试等待、节流等待等场景。
|
|
45
|
+
*/
|
|
24
46
|
function sleep$1(ms) {
|
|
25
47
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
26
48
|
}
|
|
27
|
-
/**
|
|
49
|
+
/**
|
|
50
|
+
* 统一内容为字符串。
|
|
51
|
+
*
|
|
52
|
+
* 工具返回 content 可能是 string 或 object;这里统一转成 string,
|
|
53
|
+
* 便于日志、错误判定、摘要拼接。
|
|
54
|
+
*/
|
|
28
55
|
function toContentString(content) {
|
|
29
56
|
return typeof content === "string" ? content : JSON.stringify(content, null, 2);
|
|
30
57
|
}
|
|
31
|
-
/**
|
|
58
|
+
/**
|
|
59
|
+
* 解析快照放宽提示。
|
|
60
|
+
*
|
|
61
|
+
* 约定格式:`SNAPSHOT_HINT: EXPAND_CHILDREN #ref1 #ref2`
|
|
62
|
+
*
|
|
63
|
+
* 返回:去掉 `#` 前缀后的 ref id 列表。
|
|
64
|
+
*/
|
|
65
|
+
function parseSnapshotExpandHints(text) {
|
|
66
|
+
if (!text) return [];
|
|
67
|
+
const refs = [];
|
|
68
|
+
const regex = /^\s*SNAPSHOT_HINT\s*:\s*EXPAND_CHILDREN\s+(.+)$/gim;
|
|
69
|
+
let match;
|
|
70
|
+
while ((match = regex.exec(text)) !== null) {
|
|
71
|
+
const tokens = (match[1] ?? "").match(/#[A-Za-z0-9_-]+/g) ?? [];
|
|
72
|
+
for (const token of tokens) refs.push(token.replace(/^#/, ""));
|
|
73
|
+
}
|
|
74
|
+
return refs;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* 提取 hash selector 的 ref。
|
|
78
|
+
*
|
|
79
|
+
* 仅处理“纯 hash 选择器”,例如 `#1rv01x`。
|
|
80
|
+
* 如果是复杂 CSS(如 `.x #id`)会返回 null,避免误判。
|
|
81
|
+
*/
|
|
82
|
+
function extractHashSelectorRef(toolInput) {
|
|
83
|
+
if (!toolInput || typeof toolInput !== "object") return null;
|
|
84
|
+
const selector = toolInput.selector;
|
|
85
|
+
if (typeof selector !== "string") return null;
|
|
86
|
+
const m = selector.trim().match(/^#([A-Za-z0-9_-]+)$/);
|
|
87
|
+
return m ? m[1] : null;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* 构建任务数组。
|
|
91
|
+
*
|
|
92
|
+
* 作用:把一轮工具调用规整成稳定字符串数组,
|
|
93
|
+
* 用于“上一轮任务回显”和“重复批次检测”。
|
|
94
|
+
*/
|
|
95
|
+
function buildTaskArray(toolCalls) {
|
|
96
|
+
return toolCalls.map((tc) => `${tc.name}:${JSON.stringify(tc.input)}`);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* 规范化模型输出。
|
|
100
|
+
*
|
|
101
|
+
* 优先保留 REMAINING;否则保留首段摘要,避免长文本污染上下文。
|
|
102
|
+
*
|
|
103
|
+
* 返回字符串会被注入下一轮消息,作为“上一轮模型输出摘要”。
|
|
104
|
+
*/
|
|
105
|
+
function normalizeModelOutput(text) {
|
|
106
|
+
if (!text) return "";
|
|
107
|
+
const trimmed = text.trim();
|
|
108
|
+
if (!trimmed) return "";
|
|
109
|
+
const remainingMatch = trimmed.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
110
|
+
if (remainingMatch) return `REMAINING: ${remainingMatch[1].trim()}`;
|
|
111
|
+
return (trimmed.split(/\n\s*\n/)[0]?.trim() ?? trimmed).slice(0, 220);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* 解析 REMAINING。
|
|
115
|
+
*
|
|
116
|
+
* 返回值:
|
|
117
|
+
* - `""` 表示 DONE
|
|
118
|
+
* - 非空字符串表示新的 remaining
|
|
119
|
+
* - `null` 表示协议缺失
|
|
120
|
+
*
|
|
121
|
+
* 注意:这里只负责解析,不负责 fallback 策略。
|
|
122
|
+
*/
|
|
123
|
+
function parseRemainingInstruction(text) {
|
|
124
|
+
if (!text) return null;
|
|
125
|
+
const match = text.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
126
|
+
if (!match) return null;
|
|
127
|
+
const value = match[1].trim();
|
|
128
|
+
return /^done$/i.test(value) ? "" : value;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* 推导下一轮 remaining。
|
|
132
|
+
*
|
|
133
|
+
* 策略:
|
|
134
|
+
* - 有 REMAINING 协议 -> 使用模型给出的 nextInstruction
|
|
135
|
+
* - 无协议 -> 保持 currentInstruction 不变(由上层决定是否启发式推进)
|
|
136
|
+
*/
|
|
137
|
+
function deriveNextInstruction(text, currentInstruction) {
|
|
138
|
+
const parsed = parseRemainingInstruction(text);
|
|
139
|
+
if (parsed !== null) return {
|
|
140
|
+
nextInstruction: parsed,
|
|
141
|
+
hasRemainingProtocol: true
|
|
142
|
+
};
|
|
143
|
+
return {
|
|
144
|
+
nextInstruction: currentInstruction,
|
|
145
|
+
hasRemainingProtocol: false
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* 启发式剔除 remaining。
|
|
150
|
+
*
|
|
151
|
+
* 用于协议缺失但本轮有执行动作时,按线性步骤剔除已执行数量。
|
|
152
|
+
*
|
|
153
|
+
* 这是“保守推进”策略,不保证语义完美,但能避免 remaining 长期不变。
|
|
154
|
+
*/
|
|
155
|
+
function reduceRemainingHeuristically(currentInstruction, executedCount) {
|
|
156
|
+
if (!currentInstruction.trim() || executedCount <= 0) return currentInstruction;
|
|
157
|
+
if (!/(->|=>|→|\bthen\b|\band then\b|\bnext\b|\bafter that\b|然后|接着|随后|之后|再)/i.test(currentInstruction)) return currentInstruction;
|
|
158
|
+
const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").split(/\s*(?:then|and then|next|after that|然后|接着|随后|之后|再)\s*/gi).map((part) => part.trim()).filter(Boolean);
|
|
159
|
+
if (parts.length <= 1) return currentInstruction;
|
|
160
|
+
const consumedSteps = Math.min(Math.max(1, Math.floor(executedCount)), 1);
|
|
161
|
+
const nextParts = parts.slice(Math.min(consumedSteps, parts.length));
|
|
162
|
+
if (nextParts.length === 0) return "";
|
|
163
|
+
return nextParts.join(" 然后 ");
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* 判定是否强制断轮。
|
|
167
|
+
*
|
|
168
|
+
* 语义:潜在 DOM 结构变化动作后,等待下一轮新快照。
|
|
169
|
+
*
|
|
170
|
+
* 当前规则:
|
|
171
|
+
* - `navigate.*` 一律断轮
|
|
172
|
+
* - `dom.press` 仅 Enter 断轮
|
|
173
|
+
* - `evaluate` 断轮
|
|
174
|
+
* - 其他动作默认不断轮
|
|
175
|
+
*/
|
|
176
|
+
function shouldForceRoundBreak(toolName, toolInput) {
|
|
177
|
+
const action = getToolAction(toolInput);
|
|
178
|
+
if (toolName === "navigate") return action === "goto" || action === "back" || action === "forward" || action === "reload";
|
|
179
|
+
if (toolName === "dom") {
|
|
180
|
+
if (action === "press") return (typeof toolInput === "object" && toolInput !== null ? String(toolInput.key ?? toolInput.value ?? "") : "") === "Enter";
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
return toolName === "evaluate";
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* 判定动作是否可能引发页面结构或状态变化。
|
|
187
|
+
*
|
|
188
|
+
* 用于“轮次后稳定等待”触发条件:
|
|
189
|
+
* - 命中 true:本轮结束后执行加载态 + DOM 静默双重等待
|
|
190
|
+
* - 命中 false:跳过等待,直接进入下一轮
|
|
191
|
+
*/
|
|
192
|
+
function isPotentialDomMutation(toolName, toolInput) {
|
|
193
|
+
const action = getToolAction(toolInput);
|
|
194
|
+
if (toolName === "navigate") return true;
|
|
195
|
+
if (toolName === "evaluate") return true;
|
|
196
|
+
if (toolName !== "dom") return false;
|
|
197
|
+
if (!action) return false;
|
|
198
|
+
return [
|
|
199
|
+
"click",
|
|
200
|
+
"fill",
|
|
201
|
+
"select_option",
|
|
202
|
+
"clear",
|
|
203
|
+
"check",
|
|
204
|
+
"uncheck",
|
|
205
|
+
"type",
|
|
206
|
+
"focus",
|
|
207
|
+
"hover",
|
|
208
|
+
"scroll",
|
|
209
|
+
"press",
|
|
210
|
+
"set_attr",
|
|
211
|
+
"add_class",
|
|
212
|
+
"remove_class"
|
|
213
|
+
].includes(action);
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* 采集找不到元素任务。
|
|
217
|
+
*
|
|
218
|
+
* 返回 null 表示当前结果不属于“元素未找到”,
|
|
219
|
+
* 返回对象表示可进入 not-found retry 对话流。
|
|
220
|
+
*/
|
|
221
|
+
function collectMissingTask(name, input, result) {
|
|
222
|
+
if (!isElementNotFoundResult(result)) return null;
|
|
223
|
+
return {
|
|
224
|
+
name,
|
|
225
|
+
input,
|
|
226
|
+
reason: toContentString(result.content).slice(0, 240)
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* 元素不存在判定。
|
|
231
|
+
*
|
|
232
|
+
* 判定顺序:
|
|
233
|
+
* 1) 优先看结构化错误码 `ELEMENT_NOT_FOUND`
|
|
234
|
+
* 2) 回退看中文错误文本关键词(兼容历史结果格式)
|
|
235
|
+
*/
|
|
32
236
|
function isElementNotFoundResult(result) {
|
|
33
237
|
const details = result.details;
|
|
34
238
|
if (details && typeof details === "object") {
|
|
@@ -37,14 +241,19 @@ function isElementNotFoundResult(result) {
|
|
|
37
241
|
const content = toContentString(result.content);
|
|
38
242
|
return content.includes("未找到") && content.includes("元素");
|
|
39
243
|
}
|
|
40
|
-
/**
|
|
244
|
+
/**
|
|
245
|
+
* 生成稳定调用键。
|
|
246
|
+
*
|
|
247
|
+
* 用于 recoveryAttempts 的 map key(同名 + 同参数视为同一调用)。
|
|
248
|
+
*/
|
|
41
249
|
function buildToolCallKey(name, input) {
|
|
42
250
|
return `${name}:${JSON.stringify(input)}`;
|
|
43
251
|
}
|
|
44
252
|
/**
|
|
45
|
-
*
|
|
253
|
+
* 解析恢复等待时长。
|
|
46
254
|
* 优先级:waitMs > waitSeconds > 默认值。
|
|
47
|
-
*
|
|
255
|
+
*
|
|
256
|
+
* 统一返回毫秒整数,且最小为 0。
|
|
48
257
|
*/
|
|
49
258
|
function resolveRecoveryWaitMs(input) {
|
|
50
259
|
if (!input || typeof input !== "object") return DEFAULT_RECOVERY_WAIT_MS;
|
|
@@ -55,13 +264,21 @@ function resolveRecoveryWaitMs(input) {
|
|
|
55
264
|
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
56
265
|
return DEFAULT_RECOVERY_WAIT_MS;
|
|
57
266
|
}
|
|
58
|
-
/**
|
|
267
|
+
/**
|
|
268
|
+
* 读取工具 action。
|
|
269
|
+
*
|
|
270
|
+
* 仅在 input 是对象且 action 为字符串时返回值,否则返回 undefined。
|
|
271
|
+
*/
|
|
59
272
|
function getToolAction(input) {
|
|
60
273
|
if (!input || typeof input !== "object") return void 0;
|
|
61
274
|
const action = input.action;
|
|
62
275
|
return typeof action === "string" ? action : void 0;
|
|
63
276
|
}
|
|
64
|
-
/**
|
|
277
|
+
/**
|
|
278
|
+
* 判定错误标记。
|
|
279
|
+
*
|
|
280
|
+
* 约定:`result.details.error === true` 视为错误结果。
|
|
281
|
+
*/
|
|
65
282
|
function hasToolError(result) {
|
|
66
283
|
return result.details && typeof result.details === "object" ? Boolean(result.details.error) : false;
|
|
67
284
|
}
|
|
@@ -69,25 +286,24 @@ function hasToolError(result) {
|
|
|
69
286
|
//#endregion
|
|
70
287
|
//#region src/core/agent-loop/snapshot.ts
|
|
71
288
|
/**
|
|
72
|
-
*
|
|
289
|
+
* 读取页面快照。
|
|
73
290
|
*
|
|
74
291
|
* 默认关闭 viewportOnly,优先完整性。
|
|
75
|
-
* viewportOnly defaults to false to prioritize completeness.
|
|
76
292
|
*
|
|
77
|
-
*
|
|
293
|
+
* 步骤:
|
|
78
294
|
* 1) 合并调用方 options 与默认值(深度/裁剪/剪枝/节点上限等)。
|
|
79
295
|
* 2) 分发 `page_info.snapshot` 获取当前 DOM 文本快照。
|
|
80
296
|
* 3) 使用 `toContentString` 归一化输出,避免 provider 差异导致结构不一致。
|
|
81
297
|
* 4) 返回稳定字符串给 loop,供后续注入消息与统计。
|
|
82
298
|
*
|
|
83
|
-
*
|
|
299
|
+
* 默认参数意图:
|
|
84
300
|
* - `maxDepth=8`: 保留足够层级,减少关键控件被截断。
|
|
85
301
|
* - `viewportOnly=false`: 优先完整性,避免误判“元素不存在”。
|
|
86
302
|
* - `pruneLayout=true`: 抑制纯布局噪声,降低 token 压力。
|
|
87
303
|
* - `maxNodes=500` / `maxChildren=30`: 控制体积上限,兼顾可读性。
|
|
88
304
|
* - `maxTextLength=40`: 防止长文本淹没结构信息。
|
|
89
305
|
*
|
|
90
|
-
*
|
|
306
|
+
* 压缩/剪枝是怎么做的:
|
|
91
307
|
* - `viewportOnly=true` 时:仅保留与视口相交元素(根层容器保留),完全视口外元素跳过。
|
|
92
308
|
* - `pruneLayout=true` 时:无 id/无语义/无交互/无直接文本的布局容器会被“折叠”,
|
|
93
309
|
* 子节点直接提升输出,减少无意义层级;当同一折叠容器提升出多个相邻节点时,
|
|
@@ -98,10 +314,10 @@ function hasToolError(result) {
|
|
|
98
314
|
* - 交互优先排序:优先输出按钮/输入框/链接等交互元素,再输出普通元素。
|
|
99
315
|
* - 属性压缩:仅保留关键属性(如 id、关键 class、交互属性、布尔状态、val),减少冗余 token。
|
|
100
316
|
*
|
|
101
|
-
*
|
|
102
|
-
* -
|
|
103
|
-
* -
|
|
104
|
-
* -
|
|
317
|
+
* 输入/输出:
|
|
318
|
+
* - 输入:`ToolRegistry` + 可选快照参数
|
|
319
|
+
* - 输出:归一化后的快照字符串(始终 string)
|
|
320
|
+
* - 副作用:无本地状态写入;仅依赖工具调用结果
|
|
105
321
|
*/
|
|
106
322
|
async function readPageSnapshot(registry, options) {
|
|
107
323
|
return toContentString((await registry.dispatch("page_info", {
|
|
@@ -118,9 +334,9 @@ async function readPageSnapshot(registry, options) {
|
|
|
118
334
|
})).content);
|
|
119
335
|
}
|
|
120
336
|
/**
|
|
121
|
-
*
|
|
337
|
+
* 包裹快照。
|
|
122
338
|
*
|
|
123
|
-
*
|
|
339
|
+
* 作用:
|
|
124
340
|
* - 为快照加 `SNAPSHOT_START/END` 边界,便于后续正则定位。
|
|
125
341
|
* - 支持去重与旧快照剥离,防止多轮 token 累积。
|
|
126
342
|
* - 仅做纯字符串变换,不访问外部状态。
|
|
@@ -128,20 +344,20 @@ async function readPageSnapshot(registry, options) {
|
|
|
128
344
|
function wrapSnapshot(snapshot) {
|
|
129
345
|
return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
|
|
130
346
|
}
|
|
131
|
-
/**
|
|
347
|
+
/** 转义正则字符。 */
|
|
132
348
|
function escapeRegex(str) {
|
|
133
349
|
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
134
350
|
}
|
|
135
|
-
/**
|
|
351
|
+
/** 快照块匹配正则。 */
|
|
136
352
|
const SNAPSHOT_REGEX = new RegExp(`${escapeRegex(SNAPSHOT_START)}[\\s\\S]*?${escapeRegex(SNAPSHOT_END)}`, "g");
|
|
137
|
-
/**
|
|
353
|
+
/** 是否包含快照标记。 */
|
|
138
354
|
function containsSnapshot(text) {
|
|
139
|
-
return text.includes(SNAPSHOT_START);
|
|
355
|
+
return text.includes(SNAPSHOT_START) && text.includes(SNAPSHOT_END);
|
|
140
356
|
}
|
|
141
357
|
/**
|
|
142
|
-
*
|
|
358
|
+
* 剥离旧快照。
|
|
143
359
|
*
|
|
144
|
-
*
|
|
360
|
+
* 说明:
|
|
145
361
|
* - 当 prompt 中已有历史快照时,将其替换为过期占位文本。
|
|
146
362
|
* - 让每轮真正生效的只有“最新注入快照”,减少冲突上下文。
|
|
147
363
|
* - 这是 prompt 级清理;不会触碰 tool trace 中的原始结果对象。
|
|
@@ -154,7 +370,15 @@ function stripSnapshotFromPrompt(prompt) {
|
|
|
154
370
|
//#endregion
|
|
155
371
|
//#region src/core/agent-loop/messages.ts
|
|
156
372
|
/**
|
|
157
|
-
* 显式 UI
|
|
373
|
+
* 显式 UI 意图判定。
|
|
374
|
+
*
|
|
375
|
+
* 用途:默认禁止模型操作 AutoPilot 自己的聊天 UI(输入框/发送按钮等),
|
|
376
|
+
* 只有当用户文本里“同时出现 UI 关键词 + 操作动词”时才放行。
|
|
377
|
+
*
|
|
378
|
+
* 判定逻辑:
|
|
379
|
+
* - `hasAgentUiKeyword`:是否提到聊天面板/输入框/发送按钮等
|
|
380
|
+
* - `hasActionVerb`:是否包含点击/输入/发送等动作意图
|
|
381
|
+
* - 二者都满足才返回 true
|
|
158
382
|
*/
|
|
159
383
|
function isExplicitAgentUiRequest(userMessage) {
|
|
160
384
|
const lower = userMessage.toLowerCase();
|
|
@@ -163,7 +387,12 @@ function isExplicitAgentUiRequest(userMessage) {
|
|
|
163
387
|
const hasActionVerb = /(press|click|type|fill|send|input|submit|enter)/i.test(lower) || /(输入|点击|发送|填写|填入|操作|提交|回车|按下)/.test(compact);
|
|
164
388
|
return hasAgentUiKeyword && hasActionVerb;
|
|
165
389
|
}
|
|
166
|
-
/**
|
|
390
|
+
/**
|
|
391
|
+
* 输入摘要。
|
|
392
|
+
*
|
|
393
|
+
* 把工具输入压缩成一段短文本(用于轨迹展示),
|
|
394
|
+
* 只保留高价值字段,避免日志过长。
|
|
395
|
+
*/
|
|
167
396
|
function formatToolInputBrief(input) {
|
|
168
397
|
if (!input || typeof input !== "object") return "";
|
|
169
398
|
const params = input;
|
|
@@ -185,7 +414,11 @@ function formatToolInputBrief(input) {
|
|
|
185
414
|
return ` (${parts.join(", ")})`;
|
|
186
415
|
}
|
|
187
416
|
/**
|
|
188
|
-
*
|
|
417
|
+
* 结果摘要。
|
|
418
|
+
*
|
|
419
|
+
* 读取工具结果首行,拼接错误码,生成一行可读结论:
|
|
420
|
+
* - 成功:`✓ ...`
|
|
421
|
+
* - 失败:`✗ ... [CODE]`
|
|
189
422
|
*/
|
|
190
423
|
function formatToolResultBrief(result) {
|
|
191
424
|
const firstLine = toContentString(result.content).split("\n").find((l) => l.trim())?.trim().slice(0, 80) ?? "";
|
|
@@ -196,15 +429,20 @@ function formatToolResultBrief(result) {
|
|
|
196
429
|
return `✓ ${firstLine}`;
|
|
197
430
|
}
|
|
198
431
|
/**
|
|
199
|
-
*
|
|
432
|
+
* 构建紧凑消息数组。
|
|
200
433
|
*
|
|
201
|
-
*
|
|
202
|
-
* Round
|
|
434
|
+
* 两种轮次语义:
|
|
435
|
+
* - Round 0:发送“初始任务 + 当前快照 + 执行约束”
|
|
436
|
+
* - Round 1+:发送“已完成步骤 + 当前 remaining + 最新快照”
|
|
203
437
|
*
|
|
204
|
-
*
|
|
438
|
+
* 渐进式语义:
|
|
205
439
|
* - `remainingInstruction`:当前轮次仍待执行的文本。
|
|
206
440
|
* - `previousRoundTasks`:上一轮已执行的任务数组,避免重复计划。
|
|
207
|
-
* -
|
|
441
|
+
* - `previousRoundModelOutput`:上一轮模型输出摘要,用于 task-reduction 输入。
|
|
442
|
+
* - `previousRoundPlannedTasks`:上一轮计划数组,用于对齐“计划 vs 实际执行”。
|
|
443
|
+
* - `protocolViolationHint`:协议修复提示(当 remaining 未完成但模型无动作时)。
|
|
444
|
+
*
|
|
445
|
+
* 输出:符合 AIMessage 结构的消息数组,可直接传给 AIClient.chat。
|
|
208
446
|
*/
|
|
209
447
|
function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint) {
|
|
210
448
|
const messages = history ? [...history] : [];
|
|
@@ -241,26 +479,10 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
241
479
|
content: `Done steps (do NOT repeat):\n${traceParts.join("\n")}`
|
|
242
480
|
});
|
|
243
481
|
const hasErrors = trace.some((e) => hasToolError(e.result));
|
|
244
|
-
const
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"",
|
|
249
|
-
"Task-reduction model:",
|
|
250
|
-
"Input: current remaining instruction + previous round executed actions + this-round actions.",
|
|
251
|
-
"Output: new remaining instruction after removing this-round actions.",
|
|
252
|
-
"Start from visible page state directly. Do NOT restate task. Do NOT output planning text.",
|
|
253
|
-
"Execute all independent visible sub-tasks in one round.",
|
|
254
|
-
"Do NOT act on elements not present in this snapshot yet.",
|
|
255
|
-
"If action changes DOM (open modal/navigate), stop after that batch and continue next round.",
|
|
256
|
-
"Do NOT call page_info (get_url/get_title/query_all/snapshot).",
|
|
257
|
-
"For dropdown/select fields, use dom with action=select_option (or fill on a select).",
|
|
258
|
-
"If a needed list shows `... (N children omitted)` under a specific container, output `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>` and wait for next round snapshot.",
|
|
259
|
-
"Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.",
|
|
260
|
-
"For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.",
|
|
261
|
-
"Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.",
|
|
262
|
-
allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content."
|
|
263
|
-
];
|
|
482
|
+
const needsMasterGoalAnchor = activeInstruction.trim().toLowerCase() !== userMessage.trim().toLowerCase();
|
|
483
|
+
const contextParts = ["## Execution context"];
|
|
484
|
+
if (needsMasterGoalAnchor) contextParts.push(`Master goal (reference only — do NOT restart from scratch):`, userMessage, "");
|
|
485
|
+
contextParts.push("Current remaining instruction:", activeInstruction, "", "Task-reduction model:", "Input: current remaining instruction + previous round executed actions + this-round actions.", "Output: new remaining instruction after removing this-round actions.", "Start from visible page state directly. Do NOT restate task. Do NOT output planning text.", "Execute all independent visible sub-tasks in one round.", "Do NOT act on elements not present in this snapshot yet.", "If action changes DOM (open modal/navigate), stop after that batch and continue next round.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", "If a needed list shows `... (N children omitted)` under a specific container, output `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>` and wait for next round snapshot.", "Build the minimal action array from current snapshot to finish this remaining instruction in one round whenever possible.", "For deterministic increase/decrease controls, compute delta from current visible value and issue exactly that many clicks in one round (e.g., +2 => two increase clicks). Do not overshoot then undo.", "Stop rule: once requested state is reached, stop tool calls. If verification is needed, verify once and then output REMAINING: DONE.", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.");
|
|
264
486
|
if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
|
|
265
487
|
else contextParts.push("", "If the goal is fully done, reply with a short summary (no tool calls).");
|
|
266
488
|
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous round planned task array (already executed):", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`));
|
|
@@ -284,7 +506,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
284
506
|
|
|
285
507
|
//#endregion
|
|
286
508
|
//#region src/core/agent-loop/recovery.ts
|
|
287
|
-
/** 冗余 page_info
|
|
509
|
+
/** 冗余 page_info 动作集合。 */
|
|
288
510
|
const REDUNDANT_PAGE_INFO_ACTIONS = new Set([
|
|
289
511
|
"snapshot",
|
|
290
512
|
"query_all",
|
|
@@ -293,7 +515,14 @@ const REDUNDANT_PAGE_INFO_ACTIONS = new Set([
|
|
|
293
515
|
"get_viewport"
|
|
294
516
|
]);
|
|
295
517
|
/**
|
|
296
|
-
* 冗余 page_info
|
|
518
|
+
* 冗余 page_info 检查。
|
|
519
|
+
*
|
|
520
|
+
* 场景:模型在 loop 中频繁请求 page_info,导致“只看不做”。
|
|
521
|
+
* 处理:命中白名单动作时直接返回拦截结果,不真正执行工具。
|
|
522
|
+
*
|
|
523
|
+
* 示例:
|
|
524
|
+
* - 输入:`page_info.snapshot`
|
|
525
|
+
* - 输出:`REDUNDANT_PAGE_INFO_SKIPPED`
|
|
297
526
|
*/
|
|
298
527
|
function checkRedundantSnapshot(toolName, toolInput, _latestSnapshot, round) {
|
|
299
528
|
if (toolName !== "page_info") return null;
|
|
@@ -309,7 +538,14 @@ function checkRedundantSnapshot(toolName, toolInput, _latestSnapshot, round) {
|
|
|
309
538
|
return null;
|
|
310
539
|
}
|
|
311
540
|
/**
|
|
312
|
-
*
|
|
541
|
+
* 快照防抖。
|
|
542
|
+
*
|
|
543
|
+
* 规则:连续触发 `page_info.snapshot` 时,第 2 次起标记为冗余,
|
|
544
|
+
* 返回 `REDUNDANT_SNAPSHOT`,提醒模型直接使用已有快照继续执行。
|
|
545
|
+
*
|
|
546
|
+
* 返回值:
|
|
547
|
+
* - `result`:可能被替换成防抖后的结果
|
|
548
|
+
* - `consecutiveCount`:更新后的连续 snapshot 计数
|
|
313
549
|
*/
|
|
314
550
|
function applySnapshotDebounce(toolName, toolInput, result, consecutiveCount) {
|
|
315
551
|
if (toolName === "page_info" && getToolAction(toolInput) === "snapshot") {
|
|
@@ -336,10 +572,18 @@ function applySnapshotDebounce(toolName, toolInput, result, consecutiveCount) {
|
|
|
336
572
|
};
|
|
337
573
|
}
|
|
338
574
|
/**
|
|
339
|
-
*
|
|
575
|
+
* 元素未找到恢复。
|
|
576
|
+
*
|
|
577
|
+
* 触发条件:
|
|
578
|
+
* - 工具是 `dom`
|
|
579
|
+
* - 结果被识别为“元素未找到”
|
|
580
|
+
*
|
|
581
|
+
* 处理流程:
|
|
582
|
+
* 1) 按调用键统计恢复次数(同 name + input 视为同一调用)
|
|
583
|
+
* 2) 在上限内:等待 -> 刷新快照 -> 返回 `ELEMENT_NOT_FOUND_RECOVERY`
|
|
584
|
+
* 3) 超过上限:返回 `ELEMENT_NOT_FOUND_MAX_RECOVERY_REACHED`
|
|
340
585
|
*
|
|
341
|
-
*
|
|
342
|
-
* Auto-recovers for initial attempts, then returns max-recovery signal.
|
|
586
|
+
* 说明:函数只返回“恢复后的结果描述”,是否继续下一轮由主循环决定。
|
|
343
587
|
*/
|
|
344
588
|
async function handleElementRecovery(toolName, toolInput, result, recoveryAttempts, registry, pageContext, callbacks) {
|
|
345
589
|
if (toolName !== "dom" || !isElementNotFoundResult(result)) return null;
|
|
@@ -371,7 +615,12 @@ async function handleElementRecovery(toolName, toolInput, result, recoveryAttemp
|
|
|
371
615
|
}
|
|
372
616
|
};
|
|
373
617
|
}
|
|
374
|
-
/**
|
|
618
|
+
/**
|
|
619
|
+
* 导航后快照刷新。
|
|
620
|
+
*
|
|
621
|
+
* 当 `navigate.goto/back/forward/reload` 成功后,立即刷新快照,
|
|
622
|
+
* 防止后续动作还在旧页面上下文里决策。
|
|
623
|
+
*/
|
|
375
624
|
async function handleNavigationUrlChange(toolName, toolInput, result, registry, pageContext, callbacks) {
|
|
376
625
|
if (toolName !== "navigate") return;
|
|
377
626
|
const action = getToolAction(toolInput);
|
|
@@ -386,7 +635,15 @@ const READ_ONLY_TOOLS = new Set(["page_info"]);
|
|
|
386
635
|
const READ_ONLY_DOM_ACTIONS = new Set(["get_text", "get_attr"]);
|
|
387
636
|
/**
|
|
388
637
|
* 空转检测:识别连续只读轮次并终止。
|
|
389
|
-
*
|
|
638
|
+
*
|
|
639
|
+
* 判定口径:
|
|
640
|
+
* - `page_info.*` 视为只读
|
|
641
|
+
* - `dom.get_text/get_attr` 视为只读
|
|
642
|
+
*
|
|
643
|
+
* 返回值语义:
|
|
644
|
+
* - `-1`:触发停机(连续 2 轮纯只读)
|
|
645
|
+
* - `0`:本轮有实质操作,计数清零
|
|
646
|
+
* - `>0`:当前连续只读轮次
|
|
390
647
|
*/
|
|
391
648
|
function detectIdleLoop(toolCalls, consecutiveReadOnlyRounds) {
|
|
392
649
|
if (toolCalls.length > 0 && toolCalls.every(({ name, input }) => {
|
|
@@ -404,32 +661,76 @@ function detectIdleLoop(toolCalls, consecutiveReadOnlyRounds) {
|
|
|
404
661
|
//#endregion
|
|
405
662
|
//#region src/core/agent-loop/index.ts
|
|
406
663
|
/**
|
|
407
|
-
* Agent Loop
|
|
408
|
-
*
|
|
409
|
-
* 负责消息构建、AI 决策、工具执行、恢复保护与指标汇总。
|
|
410
|
-
*
|
|
664
|
+
* Agent Loop 主流程(口语版)
|
|
411
665
|
*
|
|
412
666
|
* 流程图(文本):
|
|
413
667
|
*
|
|
414
668
|
* 轮次开始
|
|
415
669
|
* │
|
|
416
|
-
* ├─
|
|
417
|
-
*
|
|
418
|
-
*
|
|
419
|
-
* ├─
|
|
420
|
-
*
|
|
670
|
+
* ├─ 先看有没有最新快照
|
|
671
|
+
* │ └─ 没有就先拍一张(可带 expandChildrenRefs)
|
|
672
|
+
* │
|
|
673
|
+
* ├─ 组装本轮上下文消息
|
|
674
|
+
* │ └─ remaining + 上轮任务 + 最新快照 +(必要时)重试提示
|
|
675
|
+
* │
|
|
676
|
+
* ├─ 调用模型拿决策
|
|
677
|
+
* │ └─ 同时解析 `REMAINING` 和 `SNAPSHOT_HINT`
|
|
678
|
+
* │
|
|
679
|
+
* ├─ 有 toolCalls 吗?
|
|
680
|
+
* │ ├─ 没有:走收敛/协议修复判断(必要时等待后重试)
|
|
681
|
+
* │ └─ 有:逐个执行工具
|
|
682
|
+
* │ ├─ 冗余拦截(例如 page_info 空转)
|
|
683
|
+
* │ ├─ 失败恢复(元素未找到重试)
|
|
684
|
+
* │ ├─ 导航后更新快照
|
|
685
|
+
* │ └─ 命中断轮条件则提前结束本轮
|
|
686
|
+
* │
|
|
687
|
+
* ├─ 更新 remaining(优先协议,缺失时启发式剔除)
|
|
688
|
+
* │
|
|
689
|
+
* ├─ 防空转 / 防自转检查
|
|
690
|
+
* │ └─ 连续只读或重复批次会触发停机
|
|
691
|
+
* │
|
|
421
692
|
* ├─ 刷新快照
|
|
422
693
|
* ▼
|
|
423
694
|
* 下一轮或停机
|
|
695
|
+
*
|
|
696
|
+
* 停机条件(任一命中):
|
|
697
|
+
* - `REMAINING: DONE`(或 remaining 为空)
|
|
698
|
+
* - 协议修复后仍无推进
|
|
699
|
+
* - 连续只读(空转)
|
|
700
|
+
* - 重复批次(自转)
|
|
701
|
+
* - 达到 maxRounds
|
|
424
702
|
*/
|
|
425
703
|
/**
|
|
426
|
-
* 执行 Agent
|
|
704
|
+
* 执行 Agent 循环。
|
|
705
|
+
*
|
|
706
|
+
* 你可以把这个函数理解成“任务执行调度器”:
|
|
707
|
+
* - 输入:用户任务、系统提示词、工具注册表、历史消息、初始快照
|
|
708
|
+
* - 过程:按轮次持续执行“看页面 -> 让模型决策 -> 跑工具 -> 更新上下文”
|
|
709
|
+
* - 输出:最终回复、完整工具调用记录、可复用消息、结构化指标
|
|
427
710
|
*
|
|
428
|
-
*
|
|
429
|
-
*
|
|
711
|
+
* 每轮主流程(固定顺序):
|
|
712
|
+
* 1) Ensure Snapshot:确保当前有最新快照(必要时读取)
|
|
713
|
+
* 2) Build Messages:构建紧凑上下文(remaining + 上轮轨迹 + 最新快照)
|
|
714
|
+
* 3) Call AI:请求模型并解析协议字段(`REMAINING` / `SNAPSHOT_HINT`)
|
|
715
|
+
* 4) Execute Tools:执行工具调用并应用保护机制(冗余拦截、恢复、导航刷新)
|
|
716
|
+
* 5) Reduce Remaining:推进剩余任务(优先协议,缺失时启发式剔除)
|
|
717
|
+
* 6) Guard & Refresh:防空转/防自转判定,并刷新快照进入下一轮
|
|
718
|
+
*
|
|
719
|
+
* 核心状态语义:
|
|
720
|
+
* - `remainingInstruction`:当前轮还未消费完的任务文本
|
|
721
|
+
* - `previousRoundTasks`:上一轮已执行动作,防止模型原样重复
|
|
722
|
+
* - `previousRoundPlannedTasks`:上一轮模型计划,用于重复批次检测
|
|
723
|
+
* - `protocolViolationHint`:协议修复提示(remaining 未完成却无工具调用时注入)
|
|
724
|
+
*
|
|
725
|
+
* 停机条件(命中任意一条即结束):
|
|
726
|
+
* - 模型无工具调用且 remaining 已收敛(`REMAINING: DONE` 或空)
|
|
727
|
+
* - 协议修复后仍无推进
|
|
728
|
+
* - 连续只读轮次(防空转)
|
|
729
|
+
* - 连续重复计划批次(防自转)
|
|
730
|
+
* - 达到 `maxRounds`
|
|
430
731
|
*/
|
|
431
732
|
async function executeAgentLoop(params) {
|
|
432
|
-
const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
|
|
733
|
+
const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, roundStabilityWait, callbacks } = params;
|
|
433
734
|
const tools = registry.getDefinitions();
|
|
434
735
|
const allToolCalls = [];
|
|
435
736
|
const fullToolTrace = [];
|
|
@@ -450,6 +751,12 @@ async function executeAgentLoop(params) {
|
|
|
450
751
|
let lastRoundHadError = false;
|
|
451
752
|
let protocolViolationHint;
|
|
452
753
|
const snapshotExpandRefIds = /* @__PURE__ */ new Set();
|
|
754
|
+
const effectiveRoundStabilityWait = {
|
|
755
|
+
enabled: roundStabilityWait?.enabled ?? true,
|
|
756
|
+
timeoutMs: Math.max(200, Math.floor(roundStabilityWait?.timeoutMs ?? DEFAULT_ROUND_STABILITY_WAIT_TIMEOUT_MS)),
|
|
757
|
+
quietMs: Math.max(50, Math.floor(roundStabilityWait?.quietMs ?? DEFAULT_ROUND_STABILITY_WAIT_QUIET_MS)),
|
|
758
|
+
loadingSelectors: [...new Set([...DEFAULT_ROUND_STABILITY_WAIT_LOADING_SELECTORS, ...roundStabilityWait?.loadingSelectors ?? []].map((selector) => selector.trim()).filter(Boolean))]
|
|
759
|
+
};
|
|
453
760
|
let recoveryCount = 0;
|
|
454
761
|
let redundantInterceptCount = 0;
|
|
455
762
|
let pendingNotFoundRetry;
|
|
@@ -457,7 +764,7 @@ async function executeAgentLoop(params) {
|
|
|
457
764
|
let snapshotSizeTotal = 0;
|
|
458
765
|
let snapshotSizeMax = 0;
|
|
459
766
|
/**
|
|
460
|
-
*
|
|
767
|
+
* 记录快照统计。
|
|
461
768
|
*
|
|
462
769
|
* 用于输出可观测指标:读取次数、平均长度、最大长度。
|
|
463
770
|
* Used for observability metrics: read count, avg size, max size.
|
|
@@ -469,7 +776,7 @@ async function executeAgentLoop(params) {
|
|
|
469
776
|
if (snapshot.length > snapshotSizeMax) snapshotSizeMax = snapshot.length;
|
|
470
777
|
};
|
|
471
778
|
/**
|
|
472
|
-
*
|
|
779
|
+
* 刷新页面快照。
|
|
473
780
|
*
|
|
474
781
|
* 只做两件事:读取最新快照 + 更新快照统计。
|
|
475
782
|
* Does exactly two things: read latest snapshot + update metrics.
|
|
@@ -482,33 +789,32 @@ async function executeAgentLoop(params) {
|
|
|
482
789
|
recordSnapshotStats(pageContext.latestSnapshot);
|
|
483
790
|
};
|
|
484
791
|
/**
|
|
485
|
-
*
|
|
792
|
+
* 轮次后稳定等待(双重等待)。
|
|
486
793
|
*
|
|
487
|
-
*
|
|
488
|
-
*
|
|
794
|
+
* 顺序固定为:
|
|
795
|
+
* 1) 等待 loading 指示器隐藏
|
|
796
|
+
* 2) 等待 DOM quiet window
|
|
489
797
|
*/
|
|
490
|
-
const
|
|
491
|
-
if (!
|
|
492
|
-
|
|
493
|
-
const
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
const m = selector.trim().match(/^#([A-Za-z0-9_-]+)$/);
|
|
507
|
-
return m ? m[1] : null;
|
|
798
|
+
const runRoundStabilityBarrier = async () => {
|
|
799
|
+
if (!effectiveRoundStabilityWait.enabled) return;
|
|
800
|
+
if (!registry.has("wait")) return;
|
|
801
|
+
const timeout = effectiveRoundStabilityWait.timeoutMs;
|
|
802
|
+
const loadingSelector = effectiveRoundStabilityWait.loadingSelectors.join(", ");
|
|
803
|
+
if (loadingSelector) await registry.dispatch("wait", {
|
|
804
|
+
action: "wait_for_selector",
|
|
805
|
+
selector: loadingSelector,
|
|
806
|
+
state: "hidden",
|
|
807
|
+
timeout
|
|
808
|
+
});
|
|
809
|
+
await registry.dispatch("wait", {
|
|
810
|
+
action: "wait_for_stable",
|
|
811
|
+
timeout,
|
|
812
|
+
quietMs: effectiveRoundStabilityWait.quietMs
|
|
813
|
+
});
|
|
508
814
|
};
|
|
509
815
|
if (pageContext.latestSnapshot) recordSnapshotStats(pageContext.latestSnapshot);
|
|
510
816
|
/**
|
|
511
|
-
*
|
|
817
|
+
* 追加工具轨迹。
|
|
512
818
|
*
|
|
513
819
|
* 同时写入:
|
|
514
820
|
* - allToolCalls:对外返回结果
|
|
@@ -527,104 +833,6 @@ async function executeAgentLoop(params) {
|
|
|
527
833
|
result
|
|
528
834
|
});
|
|
529
835
|
};
|
|
530
|
-
/**
|
|
531
|
-
* 生成任务数组(中)/ Build normalized task array (EN).
|
|
532
|
-
*
|
|
533
|
-
* 将本轮 toolCalls 归一化成稳定字符串数组,便于:
|
|
534
|
-
* - 回传到下一轮消息上下文(提醒已执行计划)
|
|
535
|
-
* - 进行“是否与上一轮完全相同”的比较
|
|
536
|
-
*/
|
|
537
|
-
const buildTaskArray = (toolCalls) => toolCalls.map((tc) => {
|
|
538
|
-
const inputText = JSON.stringify(tc.input);
|
|
539
|
-
return `${tc.name}:${inputText}`;
|
|
540
|
-
});
|
|
541
|
-
/**
|
|
542
|
-
* 规范化模型文本输出(中)/ Normalize model text for next-round input (EN).
|
|
543
|
-
*
|
|
544
|
-
* 优先保留 REMAINING 行;否则截断首段文本,避免长篇规划污染下一轮输入。
|
|
545
|
-
* Prefer REMAINING line; otherwise keep a short excerpt to avoid long planning spillover.
|
|
546
|
-
*/
|
|
547
|
-
const normalizeModelOutput = (text) => {
|
|
548
|
-
if (!text) return "";
|
|
549
|
-
const trimmed = text.trim();
|
|
550
|
-
if (!trimmed) return "";
|
|
551
|
-
const remainingMatch = trimmed.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
552
|
-
if (remainingMatch) return `REMAINING: ${remainingMatch[1].trim()}`;
|
|
553
|
-
return (trimmed.split(/\n\s*\n/)[0]?.trim() ?? trimmed).slice(0, 220);
|
|
554
|
-
};
|
|
555
|
-
/**
|
|
556
|
-
* 判定动作是否会触发 DOM 结构变化(
|
|
557
|
-
*
|
|
558
|
-
* 触发后应强制断轮,等待下一轮新快照继续。
|
|
559
|
-
*
|
|
560
|
-
*/
|
|
561
|
-
const shouldForceRoundBreak = (toolName, toolInput) => {
|
|
562
|
-
const action = getToolAction(toolInput);
|
|
563
|
-
if (toolName === "navigate") return action === "goto" || action === "back" || action === "forward" || action === "reload";
|
|
564
|
-
if (toolName === "dom") {
|
|
565
|
-
if (action === "press") return (typeof toolInput === "object" && toolInput !== null ? String(toolInput.key ?? toolInput.value ?? "") : "") === "Enter";
|
|
566
|
-
return false;
|
|
567
|
-
}
|
|
568
|
-
if (toolName === "evaluate") return true;
|
|
569
|
-
return false;
|
|
570
|
-
};
|
|
571
|
-
/**
|
|
572
|
-
* 将“找不到元素”的失败任务整理成可重试清单(中)/ Build retry task list for not-found failures (EN).
|
|
573
|
-
*/
|
|
574
|
-
const collectMissingTask = (name, input, result) => {
|
|
575
|
-
if (!isElementNotFoundResult(result)) return null;
|
|
576
|
-
return {
|
|
577
|
-
name,
|
|
578
|
-
input,
|
|
579
|
-
reason: toContentString(result.content).slice(0, 240)
|
|
580
|
-
};
|
|
581
|
-
};
|
|
582
|
-
/**
|
|
583
|
-
* 解析 REMAINING 协议(中)/ Parse REMAINING protocol from model text (EN).
|
|
584
|
-
*
|
|
585
|
-
* 支持:
|
|
586
|
-
* - `REMAINING: <text>` → 继续下一轮消费该剩余文本
|
|
587
|
-
* - `REMAINING: DONE` → 剩余任务为空
|
|
588
|
-
* 返回 null 表示本轮没有提供 REMAINING 标记。
|
|
589
|
-
*/
|
|
590
|
-
const parseRemainingInstruction = (text) => {
|
|
591
|
-
if (!text) return null;
|
|
592
|
-
const match = text.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
593
|
-
if (!match) return null;
|
|
594
|
-
const value = match[1].trim();
|
|
595
|
-
return /^done$/i.test(value) ? "" : value;
|
|
596
|
-
};
|
|
597
|
-
/**
|
|
598
|
-
* 推进下一轮描述(中)/ Derive next-round instruction from model text (EN).
|
|
599
|
-
*
|
|
600
|
-
* 优先 REMAINING 协议;若未提供,则保持当前 remaining 不变。
|
|
601
|
-
* Priority: REMAINING protocol first; otherwise keep current remaining instruction unchanged.
|
|
602
|
-
*/
|
|
603
|
-
const deriveNextInstruction = (text, currentInstruction) => {
|
|
604
|
-
const parsed = parseRemainingInstruction(text);
|
|
605
|
-
if (parsed !== null) return {
|
|
606
|
-
nextInstruction: parsed,
|
|
607
|
-
hasRemainingProtocol: true
|
|
608
|
-
};
|
|
609
|
-
return {
|
|
610
|
-
nextInstruction: currentInstruction,
|
|
611
|
-
hasRemainingProtocol: false
|
|
612
|
-
};
|
|
613
|
-
};
|
|
614
|
-
/**
|
|
615
|
-
* 启发式任务剔除(中)/ Heuristic remaining reduction for linear instructions (EN).
|
|
616
|
-
*
|
|
617
|
-
* 在 REMAINING 缺失但本轮有执行动作时,按“线性片段”剔除已执行步数,避免下一轮继续携带整段原任务。
|
|
618
|
-
* When REMAINING is missing but actions were executed, drop executed step count from a linearized instruction.
|
|
619
|
-
*/
|
|
620
|
-
const reduceRemainingHeuristically = (currentInstruction, executedCount) => {
|
|
621
|
-
if (!currentInstruction.trim() || executedCount <= 0) return currentInstruction;
|
|
622
|
-
const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").replace(/[,,。;;]/g, " 然后 ").split(/\s*(?:然后|再|并且|并|接着|随后|之后)\s*/g).map((part) => part.trim()).filter(Boolean);
|
|
623
|
-
if (parts.length <= 1) return currentInstruction;
|
|
624
|
-
const nextParts = parts.slice(Math.min(executedCount, parts.length));
|
|
625
|
-
if (nextParts.length === 0) return "";
|
|
626
|
-
return nextParts.join(" -> ");
|
|
627
|
-
};
|
|
628
836
|
for (let round = 0; round < maxRounds; round++) {
|
|
629
837
|
callbacks?.onRound?.(round);
|
|
630
838
|
usedRounds = round + 1;
|
|
@@ -719,6 +927,7 @@ async function executeAgentLoop(params) {
|
|
|
719
927
|
break;
|
|
720
928
|
}
|
|
721
929
|
let roundHasError = false;
|
|
930
|
+
let roundHasPotentialDomMutation = false;
|
|
722
931
|
const executedTaskCalls = [];
|
|
723
932
|
const roundMissingTasks = [];
|
|
724
933
|
for (const tc of response.toolCalls) {
|
|
@@ -749,6 +958,7 @@ async function executeAgentLoop(params) {
|
|
|
749
958
|
const missingTask = collectMissingTask(tc.name, tc.input, result);
|
|
750
959
|
if (missingTask) roundMissingTasks.push(missingTask);
|
|
751
960
|
if (result.details && typeof result.details === "object") roundHasError = roundHasError || Boolean(result.details.error);
|
|
961
|
+
if (!hasToolError(result) && isPotentialDomMutation(tc.name, tc.input)) roundHasPotentialDomMutation = true;
|
|
752
962
|
if (tc.name === "page_info" && getToolAction(tc.input) === "snapshot") {
|
|
753
963
|
pageContext.latestSnapshot = toContentString(result.content);
|
|
754
964
|
recordSnapshotStats(pageContext.latestSnapshot);
|
|
@@ -764,7 +974,8 @@ async function executeAgentLoop(params) {
|
|
|
764
974
|
else pendingNotFoundRetry = void 0;
|
|
765
975
|
if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
|
|
766
976
|
else {
|
|
767
|
-
const
|
|
977
|
+
const heuristicProgressUnits = executedTaskCalls.length > 0 ? 1 : 0;
|
|
978
|
+
const nextByHeuristic = reduceRemainingHeuristically(remainingInstruction, heuristicProgressUnits);
|
|
768
979
|
if (nextByHeuristic !== remainingInstruction) remainingInstruction = nextByHeuristic;
|
|
769
980
|
else roundHasError = true;
|
|
770
981
|
}
|
|
@@ -782,6 +993,7 @@ async function executeAgentLoop(params) {
|
|
|
782
993
|
break;
|
|
783
994
|
}
|
|
784
995
|
consecutiveReadOnlyRounds = idleResult;
|
|
996
|
+
if (roundHasPotentialDomMutation) await runRoundStabilityBarrier();
|
|
785
997
|
await refreshSnapshot();
|
|
786
998
|
}
|
|
787
999
|
const resultMessages = [...history ?? [], {
|
|
@@ -828,7 +1040,9 @@ const PROVIDER_ENDPOINTS = {
|
|
|
828
1040
|
openai: "https://api.openai.com/v1",
|
|
829
1041
|
copilot: "https://models.inference.ai.azure.com",
|
|
830
1042
|
anthropic: "https://api.anthropic.com",
|
|
831
|
-
deepseek: "https://api.deepseek.com"
|
|
1043
|
+
deepseek: "https://api.deepseek.com",
|
|
1044
|
+
doubao: "https://ark.cn-beijing.volces.com/api/v3",
|
|
1045
|
+
qwen: "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|
832
1046
|
};
|
|
833
1047
|
/** 校验 provider(中)/ Validate provider support (EN). */
|
|
834
1048
|
function validateProvider(provider) {
|
|
@@ -1323,6 +1537,32 @@ async function parseAnthropicStream(response) {
|
|
|
1323
1537
|
*/
|
|
1324
1538
|
var DeepSeekClient = class extends OpenAIClient {};
|
|
1325
1539
|
|
|
1540
|
+
//#endregion
|
|
1541
|
+
//#region src/core/ai-client/doubao.ts
|
|
1542
|
+
/**
|
|
1543
|
+
* Doubao 客户端封装(中)/ Doubao client wrapper (EN).
|
|
1544
|
+
*
|
|
1545
|
+
* Doubao(火山引擎 Ark)与 OpenAI Chat Completions 兼容,直接复用 OpenAIClient。
|
|
1546
|
+
* Doubao (Volcengine Ark) is OpenAI-compatible, so it reuses OpenAIClient behavior.
|
|
1547
|
+
*/
|
|
1548
|
+
/**
|
|
1549
|
+
* Doubao 客户端类(中)/ Doubao client class extending OpenAIClient (EN).
|
|
1550
|
+
*/
|
|
1551
|
+
var DoubaoClient = class extends OpenAIClient {};
|
|
1552
|
+
|
|
1553
|
+
//#endregion
|
|
1554
|
+
//#region src/core/ai-client/qwen.ts
|
|
1555
|
+
/**
|
|
1556
|
+
* Qwen 客户端封装(中)/ Qwen client wrapper (EN).
|
|
1557
|
+
*
|
|
1558
|
+
* Qwen(阿里云百炼兼容模式)与 OpenAI Chat Completions 兼容,直接复用 OpenAIClient。
|
|
1559
|
+
* Qwen (DashScope compatible mode) is OpenAI-compatible, so it reuses OpenAIClient behavior.
|
|
1560
|
+
*/
|
|
1561
|
+
/**
|
|
1562
|
+
* Qwen 客户端类(中)/ Qwen client class extending OpenAIClient (EN).
|
|
1563
|
+
*/
|
|
1564
|
+
var QwenClient = class extends OpenAIClient {};
|
|
1565
|
+
|
|
1326
1566
|
//#endregion
|
|
1327
1567
|
//#region src/core/ai-client/index.ts
|
|
1328
1568
|
/**
|
|
@@ -1333,9 +1573,11 @@ function createAIClient(config) {
|
|
|
1333
1573
|
switch (config.provider) {
|
|
1334
1574
|
case "openai":
|
|
1335
1575
|
case "copilot": return new OpenAIClient(config);
|
|
1576
|
+
case "doubao": return new DoubaoClient(config);
|
|
1577
|
+
case "qwen": return new QwenClient(config);
|
|
1336
1578
|
case "anthropic": return new AnthropicClient(config);
|
|
1337
1579
|
case "deepseek": return new DeepSeekClient(config);
|
|
1338
|
-
default: throw new Error(`Unknown AI provider: ${config.provider}. Supported: openai, copilot, anthropic, deepseek`);
|
|
1580
|
+
default: throw new Error(`Unknown AI provider: ${config.provider}. Supported: openai, copilot, anthropic, deepseek, doubao, qwen`);
|
|
1339
1581
|
}
|
|
1340
1582
|
}
|
|
1341
1583
|
|
|
@@ -1436,12 +1678,16 @@ function buildSystemPrompt(params = {}) {
|
|
|
1436
1678
|
"- If an action will change DOM (open modal, navigate), stop after that action batch and continue next round with new snapshot.",
|
|
1437
1679
|
"- Do NOT call page_info (snapshot/query/get_url/get_title). Snapshot is already provided every round.",
|
|
1438
1680
|
"- For dropdown/select, use dom action=select_option (or fill on select).",
|
|
1681
|
+
"- Always cross-check planned actions against the original goal to avoid task drift (e.g., do not confuse create issue vs create repository).",
|
|
1439
1682
|
"- If a required list shows `... (N children omitted)` under a specific container, request focused expansion by outputting `SNAPSHOT_HINT: EXPAND_CHILDREN #<containerRef>`.",
|
|
1440
1683
|
"- After outputting snapshot expansion hint, wait for the next refreshed snapshot before further scrolling/clicking on that list.",
|
|
1441
1684
|
"- Verification whitelist: do NOT use get_text/get_attr to verify input/select values unless the user explicitly asks for verification.",
|
|
1442
1685
|
"- Stop rule: when the requested state is achieved, stop calling tools. If verification is requested, verify once and then return REMAINING: DONE (no repeated get_text/get_attr on the same target).",
|
|
1443
1686
|
"- Do NOT interact with AutoPilot UI unless user explicitly asks.",
|
|
1444
1687
|
"",
|
|
1688
|
+
"## Listener Abbrevs",
|
|
1689
|
+
"clk=click dbl=dblclick mdn=mousedown mup=mouseup mmv=mousemove mov=mouseover mot=mouseout men=mouseenter mlv=mouseleave pdn=pointerdown pup=pointerup pmv=pointermove tst=touchstart ted=touchend kdn=keydown kup=keyup inp=input chg=change sub=submit fcs=focus blr=blur scl=scroll whl=wheel drg=drag drs=dragstart dre=dragend drp=drop ctx=contextmenu",
|
|
1690
|
+
"",
|
|
1445
1691
|
"## Output Contract",
|
|
1446
1692
|
"- Return tool calls for this round.",
|
|
1447
1693
|
"- Also include one plain text line:",
|
|
@@ -1469,25 +1715,89 @@ function buildSystemPrompt(params = {}) {
|
|
|
1469
1715
|
}
|
|
1470
1716
|
|
|
1471
1717
|
//#endregion
|
|
1472
|
-
//#region src/web/
|
|
1718
|
+
//#region src/web/event-listener-tracker.ts
|
|
1719
|
+
const elementEventMap = /* @__PURE__ */ new WeakMap();
|
|
1720
|
+
let installed = false;
|
|
1721
|
+
let originalAddEventListener;
|
|
1722
|
+
let originalRemoveEventListener;
|
|
1723
|
+
function normalizeEventType(type) {
|
|
1724
|
+
if (typeof type !== "string") return null;
|
|
1725
|
+
return type.trim().toLowerCase() || null;
|
|
1726
|
+
}
|
|
1727
|
+
function canTrackElementTarget(target) {
|
|
1728
|
+
if (typeof Element === "undefined") return false;
|
|
1729
|
+
return target instanceof Element;
|
|
1730
|
+
}
|
|
1731
|
+
function trackElementEvent(target, type) {
|
|
1732
|
+
if (!canTrackElementTarget(target)) return;
|
|
1733
|
+
const prev = elementEventMap.get(target);
|
|
1734
|
+
if (prev) {
|
|
1735
|
+
prev.add(type);
|
|
1736
|
+
return;
|
|
1737
|
+
}
|
|
1738
|
+
elementEventMap.set(target, new Set([type]));
|
|
1739
|
+
}
|
|
1740
|
+
function untrackElementEvent(target, type) {
|
|
1741
|
+
if (!canTrackElementTarget(target)) return;
|
|
1742
|
+
const prev = elementEventMap.get(target);
|
|
1743
|
+
if (!prev) return;
|
|
1744
|
+
prev.delete(type);
|
|
1745
|
+
if (prev.size === 0) elementEventMap.delete(target);
|
|
1746
|
+
}
|
|
1473
1747
|
/**
|
|
1474
|
-
*
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1748
|
+
* 安装全局监听追踪补丁(幂等)。
|
|
1749
|
+
*/
|
|
1750
|
+
function installEventListenerTracking() {
|
|
1751
|
+
if (installed) return;
|
|
1752
|
+
if (typeof EventTarget === "undefined") return;
|
|
1753
|
+
const proto = EventTarget.prototype;
|
|
1754
|
+
const nativeAdd = proto.addEventListener;
|
|
1755
|
+
const nativeRemove = proto.removeEventListener;
|
|
1756
|
+
if (typeof nativeAdd !== "function" || typeof nativeRemove !== "function") return;
|
|
1757
|
+
originalAddEventListener = nativeAdd;
|
|
1758
|
+
originalRemoveEventListener = nativeRemove;
|
|
1759
|
+
proto.addEventListener = function patchedAddEventListener(type, listener, options) {
|
|
1760
|
+
originalAddEventListener?.call(this, type, listener, options);
|
|
1761
|
+
try {
|
|
1762
|
+
const normalizedType = normalizeEventType(type);
|
|
1763
|
+
if (!normalizedType || listener == null) return;
|
|
1764
|
+
trackElementEvent(this, normalizedType);
|
|
1765
|
+
} catch {}
|
|
1766
|
+
};
|
|
1767
|
+
proto.removeEventListener = function patchedRemoveEventListener(type, listener, options) {
|
|
1768
|
+
originalRemoveEventListener?.call(this, type, listener, options);
|
|
1769
|
+
try {
|
|
1770
|
+
const normalizedType = normalizeEventType(type);
|
|
1771
|
+
if (!normalizedType || listener == null) return;
|
|
1772
|
+
untrackElementEvent(this, normalizedType);
|
|
1773
|
+
} catch {}
|
|
1774
|
+
};
|
|
1775
|
+
installed = true;
|
|
1776
|
+
}
|
|
1777
|
+
/**
|
|
1778
|
+
* 读取元素已记录的事件名(排序后返回,便于稳定输出)。
|
|
1779
|
+
*/
|
|
1780
|
+
function getTrackedElementEvents(el) {
|
|
1781
|
+
const set = elementEventMap.get(el);
|
|
1782
|
+
if (!set || set.size === 0) return [];
|
|
1783
|
+
return Array.from(set).sort();
|
|
1784
|
+
}
|
|
1785
|
+
/**
|
|
1786
|
+
* 判断元素是否存在至少一个被追踪到的事件绑定。
|
|
1787
|
+
*/
|
|
1788
|
+
function hasTrackedElementEvents(el) {
|
|
1789
|
+
return (elementEventMap.get(el)?.size ?? 0) > 0;
|
|
1790
|
+
}
|
|
1791
|
+
|
|
1792
|
+
//#endregion
|
|
1793
|
+
//#region src/web/tools/dom-tool/constants.ts
|
|
1794
|
+
/**
|
|
1795
|
+
* DOM Tool 常量定义。
|
|
1487
1796
|
*
|
|
1488
|
-
*
|
|
1797
|
+
* 包含:input 类型分类、修饰键集合、键码映射、滚动策略。
|
|
1489
1798
|
*/
|
|
1490
|
-
|
|
1799
|
+
/** 默认等待超时(ms) */
|
|
1800
|
+
const DEFAULT_WAIT_MS = 1200;
|
|
1491
1801
|
/** scrollIntoView 轮换策略(参考 Playwright dom.ts) */
|
|
1492
1802
|
const SCROLL_OPTIONS = [
|
|
1493
1803
|
void 0,
|
|
@@ -1547,6 +1857,9 @@ const KEY_CODE_MAP = {
|
|
|
1547
1857
|
Alt: "AltLeft",
|
|
1548
1858
|
Meta: "MetaLeft"
|
|
1549
1859
|
};
|
|
1860
|
+
|
|
1861
|
+
//#endregion
|
|
1862
|
+
//#region src/web/tools/dom-tool/query.ts
|
|
1550
1863
|
let activeRefStore;
|
|
1551
1864
|
function setActiveRefStore(store) {
|
|
1552
1865
|
activeRefStore = store;
|
|
@@ -1557,15 +1870,26 @@ function getActiveRefStore() {
|
|
|
1557
1870
|
function sleep(ms) {
|
|
1558
1871
|
return new Promise((r) => setTimeout(r, ms));
|
|
1559
1872
|
}
|
|
1560
|
-
/**
|
|
1873
|
+
/**
|
|
1874
|
+
* 查询元素:优先 RefStore hash,回退 CSS 选择器。
|
|
1875
|
+
* 支持复合 hash 选择器(如 "#hashID .child-class")——先解析 hash 根,再在其子树内 querySelector。
|
|
1876
|
+
*/
|
|
1561
1877
|
function queryElement(selector) {
|
|
1562
1878
|
try {
|
|
1563
1879
|
if (selector.startsWith("#") && activeRefStore) {
|
|
1564
|
-
const
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1880
|
+
const spaceIdx = selector.indexOf(" ");
|
|
1881
|
+
const hashPart = spaceIdx > 0 ? selector.slice(1, spaceIdx) : selector.slice(1);
|
|
1882
|
+
const rest = spaceIdx > 0 ? selector.slice(spaceIdx + 1).trim() : "";
|
|
1883
|
+
if (activeRefStore.has(hashPart)) {
|
|
1884
|
+
const root = activeRefStore.get(hashPart);
|
|
1885
|
+
if (!root || !root.isConnected) {
|
|
1886
|
+
activeRefStore.delete(hashPart);
|
|
1887
|
+
return `未找到 ref "#${hashPart}" 对应的元素(可能已被移除或快照已过期)`;
|
|
1888
|
+
}
|
|
1889
|
+
if (!rest) return root;
|
|
1890
|
+
const child = root.querySelector(rest);
|
|
1891
|
+
if (!child) return `在 #${hashPart} 内未找到匹配 "${rest}" 的子元素`;
|
|
1892
|
+
return child;
|
|
1569
1893
|
}
|
|
1570
1894
|
}
|
|
1571
1895
|
const el = document.querySelector(selector);
|
|
@@ -1593,6 +1917,30 @@ function resolveWaitMs(params) {
|
|
|
1593
1917
|
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
1594
1918
|
return DEFAULT_WAIT_MS;
|
|
1595
1919
|
}
|
|
1920
|
+
/** 生成元素的简洁描述字符串,用于工具调用结果的可读输出。 */
|
|
1921
|
+
function describeElement(el) {
|
|
1922
|
+
const tag = el.tagName.toLowerCase();
|
|
1923
|
+
const id = el.id ? `#${el.id}` : "";
|
|
1924
|
+
const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
|
|
1925
|
+
const text = el instanceof HTMLSelectElement ? el.selectedOptions[0]?.textContent?.trim().slice(0, 40) ?? "" : el.textContent?.trim().slice(0, 40) ?? "";
|
|
1926
|
+
const textHint = text ? ` "${text}"` : "";
|
|
1927
|
+
const hints = [];
|
|
1928
|
+
for (const attr of [
|
|
1929
|
+
"type",
|
|
1930
|
+
"name",
|
|
1931
|
+
"placeholder",
|
|
1932
|
+
"href",
|
|
1933
|
+
"role"
|
|
1934
|
+
]) {
|
|
1935
|
+
const v = el.getAttribute(attr);
|
|
1936
|
+
if (v) hints.push(`${attr}=${v}`);
|
|
1937
|
+
}
|
|
1938
|
+
if (el instanceof HTMLSelectElement && el.value) hints.push(`val=${el.value}`);
|
|
1939
|
+
return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
|
|
1940
|
+
}
|
|
1941
|
+
|
|
1942
|
+
//#endregion
|
|
1943
|
+
//#region src/web/tools/dom-tool/actionability.ts
|
|
1596
1944
|
/** 检查元素样式可见性(处理 checkVisibility / details 折叠 / visibility) */
|
|
1597
1945
|
function isStyleVisible(el, style) {
|
|
1598
1946
|
style = style ?? window.getComputedStyle(el);
|
|
@@ -1673,23 +2021,6 @@ function checkElementStable(el, timeoutMs = 800) {
|
|
|
1673
2021
|
requestAnimationFrame(check);
|
|
1674
2022
|
});
|
|
1675
2023
|
}
|
|
1676
|
-
/**
|
|
1677
|
-
* 将目标重定向到关联的交互控件。
|
|
1678
|
-
* - button-link:非交互元素→最近 button/[role=button]/a/[role=link]
|
|
1679
|
-
* - follow-label:label→control + 非交互→button/[role=button]/[role=checkbox]/[role=radio]
|
|
1680
|
-
*/
|
|
1681
|
-
function retarget(el, mode) {
|
|
1682
|
-
if (mode === "none") return el;
|
|
1683
|
-
if (!el.matches("input, textarea, select") && !el.isContentEditable) if (mode === "button-link") el = el.closest("button, [role=button], a, [role=link]") || el;
|
|
1684
|
-
else el = el.closest("button, [role=button], [role=checkbox], [role=radio]") || el;
|
|
1685
|
-
if (mode === "follow-label") {
|
|
1686
|
-
if (!el.matches("a, input, textarea, button, select, [role=link], [role=button], [role=checkbox], [role=radio]") && !el.isContentEditable) {
|
|
1687
|
-
const label = el.closest("label");
|
|
1688
|
-
if (label?.control) el = label.control;
|
|
1689
|
-
}
|
|
1690
|
-
}
|
|
1691
|
-
return el;
|
|
1692
|
-
}
|
|
1693
2024
|
function scrollIntoViewIfNeeded(el, retry = 0) {
|
|
1694
2025
|
if (retry === 0 && "scrollIntoViewIfNeeded" in el) {
|
|
1695
2026
|
el.scrollIntoViewIfNeeded(true);
|
|
@@ -1711,7 +2042,7 @@ function checkHitTarget(el) {
|
|
|
1711
2042
|
if (topEl === el || el.contains(topEl) || topEl.contains(el)) return null;
|
|
1712
2043
|
const sharedLabel = topEl.closest("label");
|
|
1713
2044
|
if (sharedLabel && sharedLabel.contains(el)) return null;
|
|
1714
|
-
return
|
|
2045
|
+
return `<${topEl.tagName.toLowerCase()}${topEl.id ? `#${topEl.id}` : ""}>`;
|
|
1715
2046
|
}
|
|
1716
2047
|
function ensureActionable(el, action, selector, force) {
|
|
1717
2048
|
if (force) return null;
|
|
@@ -1766,6 +2097,15 @@ function ensureActionable(el, action, selector, force) {
|
|
|
1766
2097
|
};
|
|
1767
2098
|
return null;
|
|
1768
2099
|
}
|
|
2100
|
+
|
|
2101
|
+
//#endregion
|
|
2102
|
+
//#region src/web/tools/dom-tool/events.ts
|
|
2103
|
+
/**
|
|
2104
|
+
* DOM Tool — 事件派发与键盘操作。
|
|
2105
|
+
*
|
|
2106
|
+
* 包含:完整点击事件链、hover 事件链、input/change 派发、
|
|
2107
|
+
* 原生 setter 写入、selectText、组合键 press。
|
|
2108
|
+
*/
|
|
1769
2109
|
function getClickPoint(el) {
|
|
1770
2110
|
const r = el.getBoundingClientRect();
|
|
1771
2111
|
return {
|
|
@@ -1774,7 +2114,7 @@ function getClickPoint(el) {
|
|
|
1774
2114
|
};
|
|
1775
2115
|
}
|
|
1776
2116
|
/**
|
|
1777
|
-
*
|
|
2117
|
+
* 完整点击事件链:
|
|
1778
2118
|
* pointermove → mousemove → (per clickCount) pointerdown → mousedown → focus → pointerup → mouseup → click
|
|
1779
2119
|
*/
|
|
1780
2120
|
function dispatchClickEvents(el, clickCount = 1) {
|
|
@@ -1942,25 +2282,31 @@ function executePress(el, key) {
|
|
|
1942
2282
|
...modState
|
|
1943
2283
|
}));
|
|
1944
2284
|
}
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
1956
|
-
|
|
1957
|
-
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
2285
|
+
|
|
2286
|
+
//#endregion
|
|
2287
|
+
//#region src/web/tools/dom-tool/resolve.ts
|
|
2288
|
+
/**
|
|
2289
|
+
* DOM Tool — 目标解析与归一化。
|
|
2290
|
+
*
|
|
2291
|
+
* 包含:retarget、checkable 目标归一化、pointer action 代理、
|
|
2292
|
+
* 表单项控件重定向、editable 穿透。
|
|
2293
|
+
*/
|
|
2294
|
+
/**
|
|
2295
|
+
* 将目标重定向到关联的交互控件。
|
|
2296
|
+
* - button-link:非交互元素→最近 button/[role=button]/a/[role=link]
|
|
2297
|
+
* - follow-label:label→control + 非交互→button/[role=button]/[role=checkbox]/[role=radio]
|
|
2298
|
+
*/
|
|
2299
|
+
function retarget(el, mode) {
|
|
2300
|
+
if (mode === "none") return el;
|
|
2301
|
+
if (!el.matches("input, textarea, select") && !el.isContentEditable) if (mode === "button-link") el = el.closest("button, [role=button], a, [role=link]") || el;
|
|
2302
|
+
else el = el.closest("button, [role=button], [role=checkbox], [role=radio]") || el;
|
|
2303
|
+
if (mode === "follow-label") {
|
|
2304
|
+
if (!el.matches("a, input, textarea, button, select, [role=link], [role=button], [role=checkbox], [role=radio]") && !el.isContentEditable) {
|
|
2305
|
+
const label = el.closest("label");
|
|
2306
|
+
if (label?.control) el = label.control;
|
|
2307
|
+
}
|
|
1961
2308
|
}
|
|
1962
|
-
|
|
1963
|
-
return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
|
|
2309
|
+
return el;
|
|
1964
2310
|
}
|
|
1965
2311
|
function getChecked(el) {
|
|
1966
2312
|
if (el instanceof HTMLInputElement && (el.type === "checkbox" || el.type === "radio")) return el.checked;
|
|
@@ -2021,6 +2367,35 @@ function resolveFormItemControlTarget(el) {
|
|
|
2021
2367
|
if (control && isElementVisible(control)) return control;
|
|
2022
2368
|
return el;
|
|
2023
2369
|
}
|
|
2370
|
+
/**
|
|
2371
|
+
* 穿透包裹容器,查找内部可编辑子元素。
|
|
2372
|
+
* 覆盖 UI 框架常见模式:wrapper div 包裹真实 input/textarea。
|
|
2373
|
+
* 若自身已可编辑则直接返回;否则在子树中搜索第一个可编辑且可见的控件。
|
|
2374
|
+
* 对 role=slider/spinbutton 等 ARIA widget:向上逐级查找最近容器中的关联 input。
|
|
2375
|
+
*/
|
|
2376
|
+
function resolveEditableTarget(el) {
|
|
2377
|
+
if (isEditableElement(el)) return el;
|
|
2378
|
+
const inner = el.querySelector("input:not([type=\"hidden\"]), textarea, select, [contenteditable=\"true\"]");
|
|
2379
|
+
if (inner && isEditableElement(inner) && isElementVisible(inner)) return inner;
|
|
2380
|
+
const role = el.getAttribute("role");
|
|
2381
|
+
if (role === "slider" || role === "spinbutton") {
|
|
2382
|
+
let ancestor = el.parentElement;
|
|
2383
|
+
for (let depth = 0; ancestor && depth < 5; depth++, ancestor = ancestor.parentElement) {
|
|
2384
|
+
const input = ancestor.querySelector("input[type=\"number\"], input[role=\"spinbutton\"], input:not([type=\"hidden\"])");
|
|
2385
|
+
if (input instanceof HTMLInputElement && isEditableElement(input) && isElementVisible(input)) return input;
|
|
2386
|
+
}
|
|
2387
|
+
}
|
|
2388
|
+
return el;
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2391
|
+
//#endregion
|
|
2392
|
+
//#region src/web/tools/dom-tool/dropdown.ts
|
|
2393
|
+
/**
|
|
2394
|
+
* DOM Tool — 自定义下拉增强。
|
|
2395
|
+
*
|
|
2396
|
+
* 包含:全局可见 option 查找、下拉弹出等待。
|
|
2397
|
+
*/
|
|
2398
|
+
/** 在全局可见 option 节点中按文本匹配(精确 → 包含) */
|
|
2024
2399
|
function findVisibleOptionByText(text) {
|
|
2025
2400
|
const target = text.trim().toLowerCase();
|
|
2026
2401
|
if (!target) return null;
|
|
@@ -2041,6 +2416,7 @@ function findVisibleOptionByText(text) {
|
|
|
2041
2416
|
for (const n of visible) if (n.textContent?.trim().toLowerCase().includes(target)) return n;
|
|
2042
2417
|
return null;
|
|
2043
2418
|
}
|
|
2419
|
+
/** 轮询等待下拉弹出层出现 */
|
|
2044
2420
|
async function waitForDropdownPopup(maxWait = 500) {
|
|
2045
2421
|
const start = Date.now();
|
|
2046
2422
|
while (Date.now() - start < maxWait) {
|
|
@@ -2049,22 +2425,33 @@ async function waitForDropdownPopup(maxWait = 500) {
|
|
|
2049
2425
|
await sleep(50);
|
|
2050
2426
|
}
|
|
2051
2427
|
}
|
|
2428
|
+
|
|
2429
|
+
//#endregion
|
|
2430
|
+
//#region src/web/tools/dom-tool/index.ts
|
|
2431
|
+
/**
|
|
2432
|
+
* DOM Tool — 浏览器 DOM 操作工具入口(结合 Playwright 核心交互模式增强)。
|
|
2433
|
+
*
|
|
2434
|
+
* 关键能力:
|
|
2435
|
+
* 1. retarget — 点击时自动重定向到 button/link/label.control
|
|
2436
|
+
* 2. scrollIntoView 多策略 — 4 种 block 对齐轮换,解决 sticky 遮挡
|
|
2437
|
+
* 3. stable 检查 — rAF 逐帧检测元素位置稳定后再操作
|
|
2438
|
+
* 4. hit-target 验证 — elementsFromPoint 检查是否被遮挡
|
|
2439
|
+
* 5. 完整点击事件链 — pointermove→pointerdown→mousedown→pointerup→mouseup→click
|
|
2440
|
+
* 6. check/uncheck 通过 click — 先检查→click 切换→验证状态
|
|
2441
|
+
* 7. press 组合键 — 支持 Control+a, Shift+Enter 等修饰键
|
|
2442
|
+
* 8. fill 分类型 — date/color/range 走 setValue,text 类走 selectAll+原生写入
|
|
2443
|
+
* 9. 自定义下拉增强 — 更广泛的 option 选择器 + 等待弹出
|
|
2444
|
+
* 10. ARIA disabled — 检查祖先链 aria-disabled
|
|
2445
|
+
*
|
|
2446
|
+
* 运行环境:浏览器 Content Script(直接访问 DOM,无 CDP)。
|
|
2447
|
+
*/
|
|
2052
2448
|
function createDomTool() {
|
|
2053
2449
|
return {
|
|
2054
2450
|
name: "dom",
|
|
2055
2451
|
description: [
|
|
2056
2452
|
"Perform DOM operations on the current page.",
|
|
2057
2453
|
"Actions: click, fill, select_option, clear, check, uncheck, type, focus, hover, scroll, press, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
2058
|
-
"
|
|
2059
|
-
"For multiple fields, use alternating pairs in one batch: focus/click A -> fill/type A -> focus/click B -> fill/type B.",
|
|
2060
|
-
"Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector.",
|
|
2061
|
-
"press supports combo keys like 'Control+a', 'Shift+Enter'.",
|
|
2062
|
-
"check/uncheck is done via click — state change is verified after action.",
|
|
2063
|
-
"Ordinal/index rule: treat visual order as 1-based when the instruction says 'the Nth item' (e.g. 4th star = 4th visible icon from left to right), and avoid off-by-one mistakes.",
|
|
2064
|
-
"Disambiguation rule: distinguish descriptive text/labels from actionable options. Do not click nearby label/help text; click the actual interactive option/control item (icon/button/option) that changes state.",
|
|
2065
|
-
"Unknown/complex components: if a container element (e.g. role=slider, rating, custom widget) has multiple child icons/items in the snapshot but you don't know how to operate it directly, try clicking the appropriate child element instead. For example, a rating component with 5 star icon children — click the 4th icon child to set 4 stars. A slider with a runway — clicking the runway at the right position may work. Always prefer interacting with visible children when the parent container doesn't respond to fill/click as expected.",
|
|
2066
|
-
"fill supports role=slider elements: use fill with a numeric value on a role=slider container (rating/slider) to set its value programmatically.",
|
|
2067
|
-
"For wheel/virtualized pickers where target option is not visible yet, use scroll on the picker column first, then click/select the newly visible option. scroll supports steps for repeated scrolling in one call."
|
|
2454
|
+
"fill auto-resolves wrapper → inner input. check/uncheck toggles via click. press supports combos (Control+a). scroll supports steps for repeated scrolling."
|
|
2068
2455
|
].join(" "),
|
|
2069
2456
|
schema: Type.Object({
|
|
2070
2457
|
action: Type.String({ description: "DOM action: click | fill | select_option | clear | check | uncheck | type | focus | hover | scroll | press | get_text | get_attr | set_attr | add_class | remove_class." }),
|
|
@@ -2079,7 +2466,7 @@ function createDomTool() {
|
|
|
2079
2466
|
deltaY: Type.Optional(Type.Number({ description: "Vertical scroll delta for scroll action. Positive = down, negative = up." })),
|
|
2080
2467
|
deltaX: Type.Optional(Type.Number({ description: "Horizontal scroll delta for scroll action." })),
|
|
2081
2468
|
steps: Type.Optional(Type.Number({ description: "Repeat count for scroll action (default 1, max 20)." })),
|
|
2082
|
-
waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default:
|
|
2469
|
+
waitMs: Type.Optional(Type.Number({ description: "Wait timeout in ms before action (default: 1200)." })),
|
|
2083
2470
|
waitSeconds: Type.Optional(Type.Number({ description: "Wait timeout in seconds (fallback for waitMs)." })),
|
|
2084
2471
|
force: Type.Optional(Type.Boolean({ description: "Skip actionability checks (default false)." }))
|
|
2085
2472
|
}),
|
|
@@ -2127,6 +2514,11 @@ function createDomTool() {
|
|
|
2127
2514
|
el = r;
|
|
2128
2515
|
}
|
|
2129
2516
|
if (action === "check" || action === "uncheck") el = resolveCheckableTarget(el);
|
|
2517
|
+
if ([
|
|
2518
|
+
"fill",
|
|
2519
|
+
"type",
|
|
2520
|
+
"clear"
|
|
2521
|
+
].includes(action)) el = resolveEditableTarget(retarget(el, "follow-label"));
|
|
2130
2522
|
const actionabilityTarget = action === "click" || action === "check" || action === "uncheck" ? resolvePointerActionTarget(resolveFormItemControlTarget(el)) : el;
|
|
2131
2523
|
try {
|
|
2132
2524
|
const checkResult = ensureActionable(actionabilityTarget, action, selector, force);
|
|
@@ -2160,7 +2552,7 @@ function createDomTool() {
|
|
|
2160
2552
|
case "fill": {
|
|
2161
2553
|
const value = params.value;
|
|
2162
2554
|
if (value === void 0) return { content: "缺少 value 参数" };
|
|
2163
|
-
const target =
|
|
2555
|
+
const target = el;
|
|
2164
2556
|
if (target instanceof HTMLInputElement) {
|
|
2165
2557
|
const type = target.type.toLowerCase();
|
|
2166
2558
|
if (INPUT_BLOCKED_TYPES.has(type)) return {
|
|
@@ -2303,7 +2695,7 @@ function createDomTool() {
|
|
|
2303
2695
|
return { content: `已选择 ${describeElement(target)}: value="${selected.value}", label="${selected.text.trim()}"` };
|
|
2304
2696
|
}
|
|
2305
2697
|
case "clear": {
|
|
2306
|
-
const target =
|
|
2698
|
+
const target = el;
|
|
2307
2699
|
if (target instanceof HTMLInputElement || target instanceof HTMLTextAreaElement) {
|
|
2308
2700
|
scrollIntoViewIfNeeded(target);
|
|
2309
2701
|
target.focus();
|
|
@@ -2363,7 +2755,7 @@ function createDomTool() {
|
|
|
2363
2755
|
case "type": {
|
|
2364
2756
|
const value = params.value;
|
|
2365
2757
|
if (value === void 0) return { content: "缺少 value 参数" };
|
|
2366
|
-
const target =
|
|
2758
|
+
const target = el;
|
|
2367
2759
|
scrollIntoViewIfNeeded(target);
|
|
2368
2760
|
if (target instanceof HTMLElement) target.focus();
|
|
2369
2761
|
for (const char of value) {
|
|
@@ -2530,6 +2922,50 @@ const MAX_EXPANDED_LIST_CHILDREN = 120;
|
|
|
2530
2922
|
/** 定向放宽 children 的硬上限。 */
|
|
2531
2923
|
const MAX_EXPANDED_CHILDREN_LIMIT = 300;
|
|
2532
2924
|
/**
|
|
2925
|
+
* 事件名 → 快照简写映射。
|
|
2926
|
+
* 目的:大幅压缩 listeners="..." 占用的 token,同时保留可读性。
|
|
2927
|
+
* 简写规则在 system-prompt 中向模型说明。
|
|
2928
|
+
*/
|
|
2929
|
+
const EVENT_ABBREV = {
|
|
2930
|
+
click: "clk",
|
|
2931
|
+
dblclick: "dbl",
|
|
2932
|
+
mousedown: "mdn",
|
|
2933
|
+
mouseup: "mup",
|
|
2934
|
+
mousemove: "mmv",
|
|
2935
|
+
mouseover: "mov",
|
|
2936
|
+
mouseout: "mot",
|
|
2937
|
+
mouseenter: "men",
|
|
2938
|
+
mouseleave: "mlv",
|
|
2939
|
+
pointerdown: "pdn",
|
|
2940
|
+
pointerup: "pup",
|
|
2941
|
+
pointermove: "pmv",
|
|
2942
|
+
pointerenter: "pen",
|
|
2943
|
+
pointerleave: "plv",
|
|
2944
|
+
touchstart: "tst",
|
|
2945
|
+
touchend: "ted",
|
|
2946
|
+
touchmove: "tmv",
|
|
2947
|
+
keydown: "kdn",
|
|
2948
|
+
keyup: "kup",
|
|
2949
|
+
keypress: "kpr",
|
|
2950
|
+
input: "inp",
|
|
2951
|
+
change: "chg",
|
|
2952
|
+
submit: "sub",
|
|
2953
|
+
focus: "fcs",
|
|
2954
|
+
blur: "blr",
|
|
2955
|
+
scroll: "scl",
|
|
2956
|
+
wheel: "whl",
|
|
2957
|
+
drag: "drg",
|
|
2958
|
+
dragstart: "drs",
|
|
2959
|
+
dragend: "dre",
|
|
2960
|
+
drop: "drp",
|
|
2961
|
+
contextmenu: "ctx",
|
|
2962
|
+
resize: "rsz"
|
|
2963
|
+
};
|
|
2964
|
+
/** 将完整事件名转为快照简写(未收录的取前 3 字符)。 */
|
|
2965
|
+
function abbrevEvent(name) {
|
|
2966
|
+
return EVENT_ABBREV[name] ?? name.slice(0, 3);
|
|
2967
|
+
}
|
|
2968
|
+
/**
|
|
2533
2969
|
* 规整快照属性值,避免把长 base64/data URL 原样注入快照。
|
|
2534
2970
|
*/
|
|
2535
2971
|
function sanitizeSnapshotAttrValue(value) {
|
|
@@ -2581,6 +3017,7 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2581
3017
|
const expandChildrenRefSet = new Set((opts.expandChildrenRefs ?? []).map((ref) => ref.trim().replace(/^#/, "")).filter(Boolean));
|
|
2582
3018
|
let emittedNodes = 0;
|
|
2583
3019
|
let truncatedByNodeBudget = false;
|
|
3020
|
+
const emittedRefIds = /* @__PURE__ */ new Set();
|
|
2584
3021
|
const refStore = opts.refStore;
|
|
2585
3022
|
const SKIP_TAGS = new Set([
|
|
2586
3023
|
"SCRIPT",
|
|
@@ -2617,6 +3054,9 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2617
3054
|
"name",
|
|
2618
3055
|
"role",
|
|
2619
3056
|
"aria-label",
|
|
3057
|
+
"aria-valuenow",
|
|
3058
|
+
"aria-valuemin",
|
|
3059
|
+
"aria-valuemax",
|
|
2620
3060
|
"src",
|
|
2621
3061
|
"alt",
|
|
2622
3062
|
"title",
|
|
@@ -2634,6 +3074,25 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2634
3074
|
"LABEL",
|
|
2635
3075
|
"SUMMARY"
|
|
2636
3076
|
]);
|
|
3077
|
+
/** 常见可交互事件(用于提升元素交互优先级)。 */
|
|
3078
|
+
const INTERACTIVE_EVENTS = new Set([
|
|
3079
|
+
"click",
|
|
3080
|
+
"dblclick",
|
|
3081
|
+
"mousedown",
|
|
3082
|
+
"mouseup",
|
|
3083
|
+
"pointerdown",
|
|
3084
|
+
"pointerup",
|
|
3085
|
+
"touchstart",
|
|
3086
|
+
"touchend",
|
|
3087
|
+
"input",
|
|
3088
|
+
"change",
|
|
3089
|
+
"keydown",
|
|
3090
|
+
"keyup",
|
|
3091
|
+
"keypress",
|
|
3092
|
+
"submit",
|
|
3093
|
+
"focus",
|
|
3094
|
+
"blur"
|
|
3095
|
+
]);
|
|
2637
3096
|
/** 布尔状态属性 — 只在存在时输出(无值),如 disabled、checked */
|
|
2638
3097
|
const BOOLEAN_ATTRS = [
|
|
2639
3098
|
"disabled",
|
|
@@ -2682,15 +3141,22 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2682
3141
|
if (el.getAttribute("id")) return false;
|
|
2683
3142
|
if (el.getAttribute("role") || el.getAttribute("aria-label")) return false;
|
|
2684
3143
|
for (const attr of Array.from(el.attributes)) if (attr.name.startsWith("on")) return false;
|
|
3144
|
+
if (hasTrackedElementEvents(el)) return false;
|
|
2685
3145
|
if (directText) return false;
|
|
2686
3146
|
return true;
|
|
2687
3147
|
}
|
|
3148
|
+
function hasInteractiveTrackedEvents(el) {
|
|
3149
|
+
const trackedEvents = getTrackedElementEvents(el);
|
|
3150
|
+
if (trackedEvents.length === 0) return false;
|
|
3151
|
+
return trackedEvents.some((eventName) => INTERACTIVE_EVENTS.has(eventName));
|
|
3152
|
+
}
|
|
2688
3153
|
function isInteractiveElement(el) {
|
|
2689
3154
|
if (INTERACTIVE_TAGS.has(el.tagName)) return true;
|
|
2690
3155
|
if (el.hasAttribute("onclick")) return true;
|
|
2691
3156
|
if (el.hasAttribute("role")) return true;
|
|
2692
3157
|
if (el.hasAttribute("tabindex")) return true;
|
|
2693
3158
|
if (el.hasAttribute("aria-label")) return true;
|
|
3159
|
+
if (hasInteractiveTrackedEvents(el)) return true;
|
|
2694
3160
|
return false;
|
|
2695
3161
|
}
|
|
2696
3162
|
/** 判断是否为“选项列表”容器(时间/下拉/listbox 等)。 */
|
|
@@ -2751,6 +3217,12 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2751
3217
|
if (!attrs.includes("readonly")) attrs.push("readonly");
|
|
2752
3218
|
}
|
|
2753
3219
|
if (el.hasAttribute("onclick")) attrs.push("onclick");
|
|
3220
|
+
const trackedEvents = getTrackedElementEvents(el);
|
|
3221
|
+
if (trackedEvents.length > 0) {
|
|
3222
|
+
const preview = trackedEvents.slice(0, 6).map(abbrevEvent).join(",");
|
|
3223
|
+
const suffix = trackedEvents.length > 6 ? ",..." : "";
|
|
3224
|
+
attrs.push(`listeners="${preview}${suffix}"`);
|
|
3225
|
+
}
|
|
2754
3226
|
const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
|
|
2755
3227
|
if (testId) {
|
|
2756
3228
|
const safeTestId = sanitizeSnapshotAttrValue(testId).slice(0, 25);
|
|
@@ -2800,8 +3272,10 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2800
3272
|
let line = `${indent}[${tag}]`;
|
|
2801
3273
|
if (directText) line += ` "${directText.slice(0, maxTextLength)}"`;
|
|
2802
3274
|
if (attrs.length) line += ` ${attrs.join(" ")}`;
|
|
2803
|
-
if (hashId)
|
|
2804
|
-
|
|
3275
|
+
if (hashId) {
|
|
3276
|
+
line += ` #${hashId}`;
|
|
3277
|
+
emittedRefIds.add(hashId);
|
|
3278
|
+
} else line += ` ref="${currentPath}"`;
|
|
2805
3279
|
const lines = [line];
|
|
2806
3280
|
emittedNodes++;
|
|
2807
3281
|
const allChildren = Array.from(el.children);
|
|
@@ -2819,6 +3293,7 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
2819
3293
|
return lines.join("\n");
|
|
2820
3294
|
}
|
|
2821
3295
|
const output = walk(root, 0, "") || "(空页面)";
|
|
3296
|
+
refStore?.prune(emittedRefIds);
|
|
2822
3297
|
if (!truncatedByNodeBudget) return output;
|
|
2823
3298
|
return `${output}\n... (snapshot truncated: maxNodes=${maxNodes})`;
|
|
2824
3299
|
}
|
|
@@ -3046,7 +3521,7 @@ function createNavigateTool() {
|
|
|
3046
3521
|
* - hash selector(如 #abc123)优先通过 RefStore 解析。
|
|
3047
3522
|
* - 可见性语义与 dom-tool 保持一致(参考 Playwright 风格)。
|
|
3048
3523
|
*/
|
|
3049
|
-
const DEFAULT_TIMEOUT =
|
|
3524
|
+
const DEFAULT_TIMEOUT = 6e3;
|
|
3050
3525
|
const POLL_INTERVAL_MS = 80;
|
|
3051
3526
|
const STABLE_TICK_MS = 50;
|
|
3052
3527
|
const OBSERVER_OPTIONS = {
|
|
@@ -3100,7 +3575,14 @@ function resolveSelector(selector) {
|
|
|
3100
3575
|
const store = getActiveRefStore();
|
|
3101
3576
|
if (store) {
|
|
3102
3577
|
const id = selector.slice(1);
|
|
3103
|
-
if (store.has(id))
|
|
3578
|
+
if (store.has(id)) {
|
|
3579
|
+
const el = store.get(id);
|
|
3580
|
+
if (!el || !el.isConnected) {
|
|
3581
|
+
store.delete(id);
|
|
3582
|
+
return null;
|
|
3583
|
+
}
|
|
3584
|
+
return el;
|
|
3585
|
+
}
|
|
3104
3586
|
}
|
|
3105
3587
|
}
|
|
3106
3588
|
try {
|
|
@@ -3238,7 +3720,7 @@ function createWaitTool() {
|
|
|
3238
3720
|
selector: Type.Optional(Type.String({ description: "CSS selector for wait_for_selector/wait_for_hidden" })),
|
|
3239
3721
|
state: Type.Optional(Type.String({ description: "Selector state for wait_for_selector: attached | visible | hidden | detached (default: attached)" })),
|
|
3240
3722
|
text: Type.Optional(Type.String({ description: "Text to wait for in wait_for_text" })),
|
|
3241
|
-
timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default:
|
|
3723
|
+
timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 6000)" })),
|
|
3242
3724
|
quietMs: Type.Optional(Type.Number({ description: "Quiet window for wait_for_stable in milliseconds (default: 300)" }))
|
|
3243
3725
|
}),
|
|
3244
3726
|
execute: async (params) => {
|
|
@@ -3444,6 +3926,29 @@ var RefStore = class {
|
|
|
3444
3926
|
has(id) {
|
|
3445
3927
|
return this.map.has(id);
|
|
3446
3928
|
}
|
|
3929
|
+
/** 删除指定 hash ID 映射,返回是否删除成功。 */
|
|
3930
|
+
delete(id) {
|
|
3931
|
+
return this.map.delete(id);
|
|
3932
|
+
}
|
|
3933
|
+
/**
|
|
3934
|
+
* 清理失效引用:
|
|
3935
|
+
* - 仅保留 keepIds 中的映射(若提供)
|
|
3936
|
+
* - 自动移除已脱离文档(isConnected=false)的元素
|
|
3937
|
+
*
|
|
3938
|
+
* @returns 被移除的映射数量
|
|
3939
|
+
*/
|
|
3940
|
+
prune(keepIds) {
|
|
3941
|
+
let removed = 0;
|
|
3942
|
+
for (const [id, el] of this.map.entries()) {
|
|
3943
|
+
const shouldKeepById = keepIds ? keepIds.has(id) : true;
|
|
3944
|
+
const isConnected = el.isConnected;
|
|
3945
|
+
if (!shouldKeepById || !isConnected) {
|
|
3946
|
+
this.map.delete(id);
|
|
3947
|
+
removed++;
|
|
3948
|
+
}
|
|
3949
|
+
}
|
|
3950
|
+
return removed;
|
|
3951
|
+
}
|
|
3447
3952
|
/** 清空所有映射 */
|
|
3448
3953
|
clear() {
|
|
3449
3954
|
this.map.clear();
|
|
@@ -3575,6 +4080,7 @@ function registerToolHandler(executors) {
|
|
|
3575
4080
|
* │ └──────────┘ └────────────┘ └──────────────┘ │
|
|
3576
4081
|
* └──────────────────────────────────────────────────┘
|
|
3577
4082
|
*/
|
|
4083
|
+
installEventListenerTracking();
|
|
3578
4084
|
var WebAgent = class WebAgent {
|
|
3579
4085
|
/** 默认系统提示词 key(兼容旧版 setSystemPrompt(prompt))。 */
|
|
3580
4086
|
static DEFAULT_SYSTEM_PROMPT_KEY = "default";
|
|
@@ -3607,6 +4113,8 @@ var WebAgent = class WebAgent {
|
|
|
3607
4113
|
autoSnapshot;
|
|
3608
4114
|
/** 快照选项 */
|
|
3609
4115
|
snapshotOptions;
|
|
4116
|
+
/** 轮次后稳定等待配置 */
|
|
4117
|
+
roundStabilityWait;
|
|
3610
4118
|
/** 工具注册表实例 — 每个 WebAgent 拥有独立的工具集 */
|
|
3611
4119
|
registry = new ToolRegistry();
|
|
3612
4120
|
/** 事件回调 — 绑定后可实时获取 Agent 进度,用于 UI 展示 */
|
|
@@ -3623,6 +4131,7 @@ var WebAgent = class WebAgent {
|
|
|
3623
4131
|
this.memory = options.memory ?? false;
|
|
3624
4132
|
this.autoSnapshot = options.autoSnapshot ?? true;
|
|
3625
4133
|
this.snapshotOptions = options.snapshotOptions ?? {};
|
|
4134
|
+
this.roundStabilityWait = options.roundStabilityWait;
|
|
3626
4135
|
if (typeof options.systemPrompt === "string") this.setSystemPrompt(options.systemPrompt);
|
|
3627
4136
|
else if (options.systemPrompt && typeof options.systemPrompt === "object") this.setSystemPrompts(options.systemPrompt);
|
|
3628
4137
|
}
|
|
@@ -3815,6 +4324,7 @@ var WebAgent = class WebAgent {
|
|
|
3815
4324
|
history: this.memory ? this.history : void 0,
|
|
3816
4325
|
dryRun: this.dryRun,
|
|
3817
4326
|
maxRounds: this.maxRounds,
|
|
4327
|
+
roundStabilityWait: this.roundStabilityWait,
|
|
3818
4328
|
callbacks: wrappedCallbacks
|
|
3819
4329
|
});
|
|
3820
4330
|
if (this.memory) this.history = result.messages;
|