agentpage 0.0.13 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +190 -8
- package/dist/index.d.mts +36 -10
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1006 -402
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -6,9 +6,11 @@ import { Type } from "@sinclair/typebox";
|
|
|
6
6
|
*
|
|
7
7
|
* 统一集中在该文件,避免在主循环中散落“魔法数字”。
|
|
8
8
|
*/
|
|
9
|
-
const DEFAULT_MAX_ROUNDS =
|
|
9
|
+
const DEFAULT_MAX_ROUNDS = 40;
|
|
10
10
|
const DEFAULT_RECOVERY_WAIT_MS = 100;
|
|
11
|
-
const DEFAULT_ACTION_RECOVERY_ROUNDS =
|
|
11
|
+
const DEFAULT_ACTION_RECOVERY_ROUNDS = 2;
|
|
12
|
+
const DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2;
|
|
13
|
+
const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 2e3;
|
|
12
14
|
/** 快照起始标记 — 用于在消息中识别快照边界 */
|
|
13
15
|
const SNAPSHOT_START = "<!-- SNAPSHOT_START -->";
|
|
14
16
|
/** 快照结束标记 */
|
|
@@ -18,15 +20,15 @@ const SNAPSHOT_OUTDATED = "[此快照已过期,请参考对话中最新的快
|
|
|
18
20
|
|
|
19
21
|
//#endregion
|
|
20
22
|
//#region src/core/agent-loop/helpers.ts
|
|
21
|
-
/**
|
|
23
|
+
/** 异步睡眠(中)/ Async sleep utility (EN). */
|
|
22
24
|
function sleep$1(ms) {
|
|
23
25
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
24
26
|
}
|
|
25
|
-
/**
|
|
27
|
+
/** 统一内容为字符串(中)/ Normalize tool content to string (EN). */
|
|
26
28
|
function toContentString(content) {
|
|
27
29
|
return typeof content === "string" ? content : JSON.stringify(content, null, 2);
|
|
28
30
|
}
|
|
29
|
-
/**
|
|
31
|
+
/** 元素不存在判定(中)/ Detect element-not-found failure (EN). */
|
|
30
32
|
function isElementNotFoundResult(result) {
|
|
31
33
|
const details = result.details;
|
|
32
34
|
if (details && typeof details === "object") {
|
|
@@ -35,15 +37,14 @@ function isElementNotFoundResult(result) {
|
|
|
35
37
|
const content = toContentString(result.content);
|
|
36
38
|
return content.includes("未找到") && content.includes("元素");
|
|
37
39
|
}
|
|
38
|
-
/**
|
|
40
|
+
/** 生成稳定调用键(中)/ Build stable key for a tool call (EN). */
|
|
39
41
|
function buildToolCallKey(name, input) {
|
|
40
42
|
return `${name}:${JSON.stringify(input)}`;
|
|
41
43
|
}
|
|
42
44
|
/**
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
* - 最后回退默认值
|
|
45
|
+
* 解析恢复等待时长(中)/ Resolve recovery wait duration (EN).
|
|
46
|
+
* 优先级:waitMs > waitSeconds > 默认值。
|
|
47
|
+
* Priority: waitMs > waitSeconds > default value.
|
|
47
48
|
*/
|
|
48
49
|
function resolveRecoveryWaitMs(input) {
|
|
49
50
|
if (!input || typeof input !== "object") return DEFAULT_RECOVERY_WAIT_MS;
|
|
@@ -54,74 +55,93 @@ function resolveRecoveryWaitMs(input) {
|
|
|
54
55
|
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
55
56
|
return DEFAULT_RECOVERY_WAIT_MS;
|
|
56
57
|
}
|
|
57
|
-
/**
|
|
58
|
-
function formatToolInputBrief(input) {
|
|
59
|
-
if (!input || typeof input !== "object") return "";
|
|
60
|
-
const params = input;
|
|
61
|
-
const parts = [];
|
|
62
|
-
for (const key of [
|
|
63
|
-
"action",
|
|
64
|
-
"selector",
|
|
65
|
-
"waitMs",
|
|
66
|
-
"waitSeconds",
|
|
67
|
-
"url",
|
|
68
|
-
"text"
|
|
69
|
-
]) {
|
|
70
|
-
const value = params[key];
|
|
71
|
-
if (value === void 0 || value === null) continue;
|
|
72
|
-
if (typeof value === "string") parts.push(`${key}=${JSON.stringify(value).slice(0, 80)}`);
|
|
73
|
-
else if (typeof value === "number" || typeof value === "boolean") parts.push(`${key}=${String(value)}`);
|
|
74
|
-
}
|
|
75
|
-
if (parts.length === 0) return "";
|
|
76
|
-
return ` (${parts.join(", ")})`;
|
|
77
|
-
}
|
|
78
|
-
/** 从工具参数中读取 action。 */
|
|
58
|
+
/** 读取工具 action(中)/ Read tool action from input (EN). */
|
|
79
59
|
function getToolAction(input) {
|
|
80
60
|
if (!input || typeof input !== "object") return void 0;
|
|
81
61
|
const action = input.action;
|
|
82
62
|
return typeof action === "string" ? action : void 0;
|
|
83
63
|
}
|
|
84
|
-
/**
|
|
64
|
+
/** 判定错误标记(中)/ Check whether result is marked as error (EN). */
|
|
85
65
|
function hasToolError(result) {
|
|
86
66
|
return result.details && typeof result.details === "object" ? Boolean(result.details.error) : false;
|
|
87
67
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
68
|
+
|
|
69
|
+
//#endregion
|
|
70
|
+
//#region src/core/agent-loop/snapshot.ts
|
|
71
|
+
/**
|
|
72
|
+
* 读取页面快照(中)/ Read current page snapshot (EN).
|
|
73
|
+
*
|
|
74
|
+
* 默认关闭 viewportOnly,优先完整性。
|
|
75
|
+
* viewportOnly defaults to false to prioritize completeness.
|
|
76
|
+
*/
|
|
77
|
+
async function readPageSnapshot(registry, options) {
|
|
95
78
|
return toContentString((await registry.dispatch("page_info", {
|
|
96
79
|
action: "snapshot",
|
|
97
|
-
maxDepth
|
|
80
|
+
maxDepth: options?.maxDepth ?? 8,
|
|
81
|
+
viewportOnly: options?.viewportOnly ?? false,
|
|
82
|
+
pruneLayout: options?.pruneLayout ?? true,
|
|
83
|
+
maxNodes: options?.maxNodes ?? 500,
|
|
84
|
+
maxChildren: options?.maxChildren ?? 30,
|
|
85
|
+
maxTextLength: options?.maxTextLength ?? 40
|
|
98
86
|
})).content);
|
|
99
87
|
}
|
|
100
|
-
/**
|
|
88
|
+
/** 包裹快照(中)/ Wrap snapshot with boundary markers (EN). */
|
|
89
|
+
function wrapSnapshot(snapshot) {
|
|
90
|
+
return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
|
|
91
|
+
}
|
|
92
|
+
/** 转义正则字符(中)/ Escape regex special chars (EN). */
|
|
101
93
|
function escapeRegex(str) {
|
|
102
94
|
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
103
95
|
}
|
|
104
|
-
/**
|
|
96
|
+
/** 快照块匹配正则(中)/ Regex for snapshot blocks (EN). */
|
|
105
97
|
const SNAPSHOT_REGEX = new RegExp(`${escapeRegex(SNAPSHOT_START)}[\\s\\S]*?${escapeRegex(SNAPSHOT_END)}`, "g");
|
|
106
|
-
/**
|
|
107
|
-
function wrapSnapshot(snapshot) {
|
|
108
|
-
return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
|
|
109
|
-
}
|
|
110
|
-
/** 检测文本中是否包含快照标记。 */
|
|
98
|
+
/** 是否包含快照标记(中)/ Check whether text includes snapshot markers (EN). */
|
|
111
99
|
function containsSnapshot(text) {
|
|
112
100
|
return text.includes(SNAPSHOT_START);
|
|
113
101
|
}
|
|
114
102
|
/**
|
|
115
|
-
*
|
|
116
|
-
* 当消息历史中已有更新的快照时调用,避免 AI 参考过时信息。
|
|
103
|
+
* 剥离旧快照(中)/ Strip outdated snapshot blocks from system prompt (EN).
|
|
117
104
|
*/
|
|
118
105
|
function stripSnapshotFromPrompt(prompt) {
|
|
119
106
|
if (!containsSnapshot(prompt)) return prompt;
|
|
120
107
|
return prompt.replace(SNAPSHOT_REGEX, SNAPSHOT_OUTDATED);
|
|
121
108
|
}
|
|
109
|
+
|
|
110
|
+
//#endregion
|
|
111
|
+
//#region src/core/agent-loop/messages.ts
|
|
122
112
|
/**
|
|
123
|
-
*
|
|
124
|
-
|
|
113
|
+
* 显式 UI 意图判定(中)/ Detect explicit intent to operate AutoPilot UI (EN).
|
|
114
|
+
*/
|
|
115
|
+
function isExplicitAgentUiRequest(userMessage) {
|
|
116
|
+
const lower = userMessage.toLowerCase();
|
|
117
|
+
const compact = lower.replace(/[\s\p{P}\p{S}]+/gu, "");
|
|
118
|
+
const hasAgentUiKeyword = /(chat|dock|chatinput|sendbutton|shortcut|quicktest)/i.test(lower) || /(聊天|对话|指令输入框|消息输入框|输入框|发送按钮|发送|快捷测试|测试按钮|聊天面板)/.test(compact);
|
|
119
|
+
const hasActionVerb = /(press|click|type|fill|send|input|submit|enter)/i.test(lower) || /(输入|点击|发送|填写|填入|操作|提交|回车|按下)/.test(compact);
|
|
120
|
+
return hasAgentUiKeyword && hasActionVerb;
|
|
121
|
+
}
|
|
122
|
+
/** 输入摘要(中)/ Build brief text for tool input (EN). */
|
|
123
|
+
function formatToolInputBrief(input) {
|
|
124
|
+
if (!input || typeof input !== "object") return "";
|
|
125
|
+
const params = input;
|
|
126
|
+
const parts = [];
|
|
127
|
+
for (const key of [
|
|
128
|
+
"action",
|
|
129
|
+
"selector",
|
|
130
|
+
"waitMs",
|
|
131
|
+
"waitSeconds",
|
|
132
|
+
"url",
|
|
133
|
+
"text"
|
|
134
|
+
]) {
|
|
135
|
+
const value = params[key];
|
|
136
|
+
if (value === void 0 || value === null) continue;
|
|
137
|
+
if (typeof value === "string") parts.push(`${key}=${JSON.stringify(value).slice(0, 80)}`);
|
|
138
|
+
else if (typeof value === "number" || typeof value === "boolean") parts.push(`${key}=${String(value)}`);
|
|
139
|
+
}
|
|
140
|
+
if (parts.length === 0) return "";
|
|
141
|
+
return ` (${parts.join(", ")})`;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* 结果摘要(中)/ Build one-line summary for tool result (EN).
|
|
125
145
|
*/
|
|
126
146
|
function formatToolResultBrief(result) {
|
|
127
147
|
const firstLine = toContentString(result.content).split("\n").find((l) => l.trim())?.trim().slice(0, 80) ?? "";
|
|
@@ -132,25 +152,37 @@ function formatToolResultBrief(result) {
|
|
|
132
152
|
return `✓ ${firstLine}`;
|
|
133
153
|
}
|
|
134
154
|
/**
|
|
135
|
-
*
|
|
136
|
-
*
|
|
137
|
-
* 核心思路:保留用户原始消息与 system prompt 不变,
|
|
138
|
-
* 只将循环中产出的 assistant(含 toolCalls)+ tool(结果)消息对
|
|
139
|
-
* 压缩为一条 assistant 摘要 + 一条 user 上下文。
|
|
155
|
+
* 构建紧凑消息数组(中)/ Build compact AI message array (EN).
|
|
140
156
|
*
|
|
141
|
-
*
|
|
142
|
-
*
|
|
143
|
-
* - 后续:[...history, { user: 原始消息 }, { assistant: 工具执行摘要 }, { user: 当前状态+快照 }]
|
|
157
|
+
* Round 0: task + snapshot.
|
|
158
|
+
* Round 1+: master goal + done steps + execution context + latest snapshot.
|
|
144
159
|
*
|
|
145
|
-
*
|
|
160
|
+
* 新增渐进式语义(中)/ Progressive semantics (EN):
|
|
161
|
+
* - `remainingInstruction`:当前轮次仍待执行的文本。
|
|
162
|
+
* - `previousRoundTasks`:上一轮已执行的任务数组,避免重复计划。
|
|
163
|
+
* - 消息中要求模型输出 `REMAINING: ...` 或 `REMAINING: DONE`,供下一轮继续消费。
|
|
146
164
|
*/
|
|
147
|
-
function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history) {
|
|
165
|
+
function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint) {
|
|
148
166
|
const messages = history ? [...history] : [];
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
167
|
+
const allowAgentUiInteraction = isExplicitAgentUiRequest(userMessage);
|
|
168
|
+
const activeInstruction = remainingInstruction && remainingInstruction.trim() ? remainingInstruction.trim() : userMessage;
|
|
169
|
+
if (trace.length === 0) {
|
|
170
|
+
const parts = [
|
|
171
|
+
userMessage,
|
|
172
|
+
"",
|
|
173
|
+
"## Progressive execution state",
|
|
174
|
+
"Current remaining instruction to execute this round:",
|
|
175
|
+
activeInstruction
|
|
176
|
+
];
|
|
177
|
+
if (currentUrl) parts.push("", `URL: ${currentUrl}`);
|
|
178
|
+
if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
|
|
179
|
+
if (protocolViolationHint) parts.push("", protocolViolationHint);
|
|
180
|
+
messages.push({
|
|
181
|
+
role: "user",
|
|
182
|
+
content: parts.join("\n")
|
|
183
|
+
});
|
|
184
|
+
return messages;
|
|
185
|
+
}
|
|
154
186
|
const traceParts = [];
|
|
155
187
|
for (let i = 0; i < trace.length; i++) {
|
|
156
188
|
const entry = trace[i];
|
|
@@ -158,23 +190,43 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
158
190
|
const brief = formatToolResultBrief(entry.result);
|
|
159
191
|
const status = isError ? "❌" : "✅";
|
|
160
192
|
const marker = entry.marker ? ` ${entry.marker}` : "";
|
|
161
|
-
traceParts.push(`${status}
|
|
193
|
+
traceParts.push(`${status} ${i + 1}. ${entry.name}${formatToolInputBrief(entry.input)} → ${brief}${marker}`);
|
|
162
194
|
}
|
|
163
195
|
messages.push({
|
|
164
196
|
role: "assistant",
|
|
165
|
-
content:
|
|
197
|
+
content: `Done steps (do NOT repeat):\n${traceParts.join("\n")}`
|
|
166
198
|
});
|
|
167
|
-
const
|
|
199
|
+
const hasErrors = trace.some((e) => hasToolError(e.result));
|
|
200
|
+
const contextParts = [
|
|
201
|
+
"## Execution context",
|
|
202
|
+
"Current remaining instruction:",
|
|
203
|
+
activeInstruction,
|
|
204
|
+
"",
|
|
205
|
+
"Task-reduction model:",
|
|
206
|
+
"Input: current remaining instruction + previous round executed actions + this-round actions.",
|
|
207
|
+
"Output: new remaining instruction after removing this-round actions.",
|
|
208
|
+
"Start from visible page state directly. Do NOT restate task. Do NOT output planning text.",
|
|
209
|
+
"Execute all independent visible sub-tasks in one round.",
|
|
210
|
+
"Do NOT act on elements not present in this snapshot yet.",
|
|
211
|
+
"If action changes DOM (open modal/navigate), stop after that batch and continue next round.",
|
|
212
|
+
"Do NOT call page_info (get_url/get_title/query_all/snapshot).",
|
|
213
|
+
"For dropdown/select fields, use dom with action=select_option (or fill on a select).",
|
|
214
|
+
allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content."
|
|
215
|
+
];
|
|
216
|
+
if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
|
|
217
|
+
else contextParts.push("", "If the goal is fully done, reply with a short summary (no tool calls).");
|
|
218
|
+
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous round planned task array (already executed):", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`));
|
|
219
|
+
if (previousRoundPlannedTasks && previousRoundPlannedTasks.length > 0) contextParts.push("", "Previous round model planned task array (before execution):", ...previousRoundPlannedTasks.map((task, index) => `${index + 1}. ${task}`));
|
|
220
|
+
if (previousRoundModelOutput) contextParts.push("", "Previous round model output (normalized, for task reduction input):", previousRoundModelOutput);
|
|
221
|
+
contextParts.push("", "After this round, include one plain text line:", "REMAINING: <new remaining instruction after this-round actions>", "or REMAINING: DONE");
|
|
168
222
|
const lastEntry = trace[trace.length - 1];
|
|
169
223
|
if (hasToolError(lastEntry.result)) {
|
|
170
224
|
const stripped = toContentString(lastEntry.result.content).replace(SNAPSHOT_REGEX, "").trim();
|
|
171
|
-
if (stripped && stripped.length <
|
|
172
|
-
contextParts.push("", "### 最近失败操作详情", stripped);
|
|
173
|
-
contextParts.push("请换一种方式完成该步骤,或跳过该步骤继续后续操作。");
|
|
174
|
-
}
|
|
225
|
+
if (stripped && stripped.length < 300) contextParts.push("", "Last error: " + stripped);
|
|
175
226
|
}
|
|
176
|
-
if (currentUrl) contextParts.push("",
|
|
177
|
-
if (
|
|
227
|
+
if (currentUrl) contextParts.push("", `URL: ${currentUrl}`);
|
|
228
|
+
if (protocolViolationHint) contextParts.push("", protocolViolationHint);
|
|
229
|
+
if (latestSnapshot) contextParts.push("", "## Latest DOM snapshot", "Use hash IDs from this snapshot. Do NOT call page_info — this is already the latest.", wrapSnapshot(latestSnapshot));
|
|
178
230
|
messages.push({
|
|
179
231
|
role: "user",
|
|
180
232
|
content: contextParts.join("\n")
|
|
@@ -182,39 +234,388 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
182
234
|
return messages;
|
|
183
235
|
}
|
|
184
236
|
|
|
237
|
+
//#endregion
|
|
238
|
+
//#region src/core/agent-loop/recovery.ts
|
|
239
|
+
/** 冗余 page_info 动作(中)/ Redundant page_info actions to intercept (EN). */
|
|
240
|
+
const REDUNDANT_PAGE_INFO_ACTIONS = new Set([
|
|
241
|
+
"snapshot",
|
|
242
|
+
"query_all",
|
|
243
|
+
"get_url",
|
|
244
|
+
"get_title",
|
|
245
|
+
"get_viewport"
|
|
246
|
+
]);
|
|
247
|
+
/**
|
|
248
|
+
* 冗余 page_info 检查(中)/ Check whether page_info call is redundant (EN).
|
|
249
|
+
*/
|
|
250
|
+
function checkRedundantSnapshot(toolName, toolInput, _latestSnapshot, round) {
|
|
251
|
+
if (toolName !== "page_info") return null;
|
|
252
|
+
const action = getToolAction(toolInput);
|
|
253
|
+
if (action && REDUNDANT_PAGE_INFO_ACTIONS.has(action)) return {
|
|
254
|
+
content: `page_info.${action} is blocked in loop execution. A snapshot is provided by the framework; continue with actionable tools directly.`,
|
|
255
|
+
details: {
|
|
256
|
+
code: "REDUNDANT_PAGE_INFO_SKIPPED",
|
|
257
|
+
action,
|
|
258
|
+
round
|
|
259
|
+
}
|
|
260
|
+
};
|
|
261
|
+
return null;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* 快照防抖(中)/ Debounce repeated snapshot calls (EN).
|
|
265
|
+
*/
|
|
266
|
+
function applySnapshotDebounce(toolName, toolInput, result, consecutiveCount) {
|
|
267
|
+
if (toolName === "page_info" && getToolAction(toolInput) === "snapshot") {
|
|
268
|
+
const newCount = consecutiveCount + 1;
|
|
269
|
+
if (newCount >= 2) return {
|
|
270
|
+
consecutiveCount: newCount,
|
|
271
|
+
result: {
|
|
272
|
+
content: [toContentString(result.content), "Redundant snapshot detected. Continue with remaining actionable steps using the latest snapshot; avoid additional snapshot unless navigation or uncertainty changes."].join("\n"),
|
|
273
|
+
details: {
|
|
274
|
+
error: true,
|
|
275
|
+
code: "REDUNDANT_SNAPSHOT",
|
|
276
|
+
consecutiveSnapshotCalls: newCount
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
};
|
|
280
|
+
return {
|
|
281
|
+
result,
|
|
282
|
+
consecutiveCount: newCount
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
return {
|
|
286
|
+
result,
|
|
287
|
+
consecutiveCount: 0
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* 元素未找到恢复(中)/ Recover from element-not-found failures (EN).
|
|
292
|
+
*
|
|
293
|
+
* 前两次自动恢复,超过上限后返回终止提示。
|
|
294
|
+
* Auto-recovers for initial attempts, then returns max-recovery signal.
|
|
295
|
+
*/
|
|
296
|
+
async function handleElementRecovery(toolName, toolInput, result, recoveryAttempts, registry, pageContext, callbacks) {
|
|
297
|
+
if (toolName !== "dom" || !isElementNotFoundResult(result)) return null;
|
|
298
|
+
const key = buildToolCallKey(toolName, toolInput);
|
|
299
|
+
const attempts = (recoveryAttempts.get(key) ?? 0) + 1;
|
|
300
|
+
recoveryAttempts.set(key, attempts);
|
|
301
|
+
const recoveryWaitMs = resolveRecoveryWaitMs(toolInput);
|
|
302
|
+
if (attempts <= DEFAULT_ACTION_RECOVERY_ROUNDS) {
|
|
303
|
+
await sleep$1(recoveryWaitMs);
|
|
304
|
+
callbacks?.onBeforeRecoverySnapshot?.();
|
|
305
|
+
pageContext.latestSnapshot = await readPageSnapshot(registry);
|
|
306
|
+
return {
|
|
307
|
+
content: [toContentString(result.content), `Recovery ${attempts}/${DEFAULT_ACTION_RECOVERY_ROUNDS}: snapshot refreshed, re-locate target.`].join("\n"),
|
|
308
|
+
details: {
|
|
309
|
+
error: true,
|
|
310
|
+
code: "ELEMENT_NOT_FOUND_RECOVERY",
|
|
311
|
+
recoveryAttempt: attempts,
|
|
312
|
+
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
313
|
+
}
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
return {
|
|
317
|
+
content: [toContentString(result.content), `Max recovery attempts (${DEFAULT_ACTION_RECOVERY_ROUNDS}) reached. Try a different target.`].join("\n"),
|
|
318
|
+
details: {
|
|
319
|
+
error: true,
|
|
320
|
+
code: "ELEMENT_NOT_FOUND_MAX_RECOVERY_REACHED",
|
|
321
|
+
recoveryAttempt: attempts,
|
|
322
|
+
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
/** 导航后快照刷新(中)/ Refresh snapshot after navigation actions (EN). */
|
|
327
|
+
async function handleNavigationUrlChange(toolName, toolInput, result, registry, pageContext, callbacks) {
|
|
328
|
+
if (toolName !== "navigate") return;
|
|
329
|
+
const action = getToolAction(toolInput);
|
|
330
|
+
if ((action === "goto" || action === "back" || action === "forward" || action === "reload") && !hasToolError(result)) {
|
|
331
|
+
callbacks?.onBeforeRecoverySnapshot?.();
|
|
332
|
+
pageContext.latestSnapshot = await readPageSnapshot(registry);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
/** 只读工具集合(中)/ Read-only tool set (EN). */
|
|
336
|
+
const READ_ONLY_TOOLS = new Set(["page_info"]);
|
|
337
|
+
/**
|
|
338
|
+
* 空转检测(中)/ Detect idle loops dominated by read-only actions (EN).
|
|
339
|
+
* 返回 -1 表示应终止循环。
|
|
340
|
+
* Returns -1 when loop should terminate.
|
|
341
|
+
*/
|
|
342
|
+
function detectIdleLoop(toolCallNames, consecutiveReadOnlyRounds) {
|
|
343
|
+
if (toolCallNames.every((name) => READ_ONLY_TOOLS.has(name))) {
|
|
344
|
+
const newCount = consecutiveReadOnlyRounds + 1;
|
|
345
|
+
return newCount >= 2 ? -1 : newCount;
|
|
346
|
+
}
|
|
347
|
+
return 0;
|
|
348
|
+
}
|
|
349
|
+
|
|
185
350
|
//#endregion
|
|
186
351
|
//#region src/core/agent-loop/index.ts
|
|
187
352
|
/**
|
|
188
|
-
*
|
|
353
|
+
* Agent Loop 主流程(中)/ Core environment-agnostic agent loop (EN).
|
|
354
|
+
*
|
|
355
|
+
* 负责消息构建、AI 决策、工具执行、恢复保护与指标汇总。
|
|
356
|
+
* Orchestrates message build, AI decisions, tool execution, recovery, and metrics.
|
|
357
|
+
*
|
|
358
|
+
* 流程图(文本):
|
|
359
|
+
*
|
|
360
|
+
* 轮次开始
|
|
361
|
+
* │
|
|
362
|
+
* ├─ 确保快照可用
|
|
363
|
+
* ├─ 构建紧凑消息(目标 + 剩余任务 + 执行轨迹 + 快照)
|
|
364
|
+
* ├─ 调用模型
|
|
365
|
+
* ├─ 无 toolCalls ? 结束 : 执行工具
|
|
366
|
+
* ├─ 应用保护机制(冗余拦截/恢复/导航检测/空转/防自转)
|
|
367
|
+
* ├─ 刷新快照
|
|
368
|
+
* ▼
|
|
369
|
+
* 下一轮或停机
|
|
370
|
+
*/
|
|
371
|
+
/**
|
|
372
|
+
* 执行 Agent 循环(中)/ Execute the agent loop (EN).
|
|
189
373
|
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
* 2. 循环:发消息给 AI → 检查是否返回 tool_call → 执行 → 反馈 → 继续
|
|
193
|
-
* 3. AI 不再调用工具时,返回最终回复
|
|
374
|
+
* 每轮:确保快照 → 构建消息 → 调用 AI → 执行工具 → 保护处理 → 刷新快照。
|
|
375
|
+
* Per round: ensure snapshot -> build messages -> call AI -> execute tools -> apply protections -> refresh snapshot.
|
|
194
376
|
*/
|
|
195
377
|
async function executeAgentLoop(params) {
|
|
196
|
-
const { client, registry, systemPrompt, message, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
|
|
378
|
+
const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
|
|
197
379
|
const tools = registry.getDefinitions();
|
|
198
380
|
const allToolCalls = [];
|
|
199
381
|
const fullToolTrace = [];
|
|
200
382
|
const actionRecoveryAttempts = /* @__PURE__ */ new Map();
|
|
201
|
-
const pageContext = {};
|
|
383
|
+
const pageContext = { latestSnapshot: initialSnapshot };
|
|
202
384
|
let finalReply = "";
|
|
385
|
+
let consecutiveSnapshotCalls = 0;
|
|
386
|
+
let consecutiveReadOnlyRounds = 0;
|
|
387
|
+
let usedRounds = 0;
|
|
388
|
+
let inputTokens = 0;
|
|
389
|
+
let outputTokens = 0;
|
|
390
|
+
let remainingInstruction = message.trim();
|
|
391
|
+
let previousRoundTasks = [];
|
|
392
|
+
let previousRoundPlannedTasks = [];
|
|
393
|
+
let previousRoundModelOutput = "";
|
|
394
|
+
let lastPlannedBatchKey = "";
|
|
395
|
+
let consecutiveSamePlannedBatch = 0;
|
|
396
|
+
let lastRoundHadError = false;
|
|
397
|
+
let protocolViolationHint;
|
|
398
|
+
let recoveryCount = 0;
|
|
399
|
+
let redundantInterceptCount = 0;
|
|
400
|
+
let pendingNotFoundRetry;
|
|
401
|
+
let snapshotReadCount = 0;
|
|
402
|
+
let snapshotSizeTotal = 0;
|
|
403
|
+
let snapshotSizeMax = 0;
|
|
404
|
+
/**
|
|
405
|
+
* 记录快照统计(中)/ Record snapshot metrics (EN).
|
|
406
|
+
*
|
|
407
|
+
* 用于输出可观测指标:读取次数、平均长度、最大长度。
|
|
408
|
+
* Used for observability metrics: read count, avg size, max size.
|
|
409
|
+
*/
|
|
410
|
+
const recordSnapshotStats = (snapshot) => {
|
|
411
|
+
if (typeof snapshot !== "string") return;
|
|
412
|
+
snapshotReadCount += 1;
|
|
413
|
+
snapshotSizeTotal += snapshot.length;
|
|
414
|
+
if (snapshot.length > snapshotSizeMax) snapshotSizeMax = snapshot.length;
|
|
415
|
+
};
|
|
416
|
+
/**
|
|
417
|
+
* 刷新页面快照(中)/ Refresh page snapshot (EN).
|
|
418
|
+
*
|
|
419
|
+
* 只做两件事:读取最新快照 + 更新快照统计。
|
|
420
|
+
* Does exactly two things: read latest snapshot + update metrics.
|
|
421
|
+
*/
|
|
422
|
+
const refreshSnapshot = async () => {
|
|
423
|
+
pageContext.latestSnapshot = await readPageSnapshot(registry);
|
|
424
|
+
recordSnapshotStats(pageContext.latestSnapshot);
|
|
425
|
+
};
|
|
426
|
+
if (pageContext.latestSnapshot) recordSnapshotStats(pageContext.latestSnapshot);
|
|
427
|
+
/**
|
|
428
|
+
* 追加工具轨迹(中)/ Append tool trace entry (EN).
|
|
429
|
+
*
|
|
430
|
+
* 同时写入:
|
|
431
|
+
* - allToolCalls:对外返回结果
|
|
432
|
+
* - fullToolTrace:下一轮消息上下文
|
|
433
|
+
*/
|
|
434
|
+
const appendToolTrace = (round, name, input, result) => {
|
|
435
|
+
allToolCalls.push({
|
|
436
|
+
name,
|
|
437
|
+
input,
|
|
438
|
+
result
|
|
439
|
+
});
|
|
440
|
+
fullToolTrace.push({
|
|
441
|
+
round,
|
|
442
|
+
name,
|
|
443
|
+
input,
|
|
444
|
+
result
|
|
445
|
+
});
|
|
446
|
+
};
|
|
447
|
+
/**
|
|
448
|
+
* 生成任务数组(中)/ Build normalized task array (EN).
|
|
449
|
+
*
|
|
450
|
+
* 将本轮 toolCalls 归一化成稳定字符串数组,便于:
|
|
451
|
+
* - 回传到下一轮消息上下文(提醒已执行计划)
|
|
452
|
+
* - 进行“是否与上一轮完全相同”的比较
|
|
453
|
+
*/
|
|
454
|
+
const buildTaskArray = (toolCalls) => toolCalls.map((tc) => {
|
|
455
|
+
const inputText = JSON.stringify(tc.input);
|
|
456
|
+
return `${tc.name}:${inputText}`;
|
|
457
|
+
});
|
|
458
|
+
/**
|
|
459
|
+
* 规范化模型文本输出(中)/ Normalize model text for next-round input (EN).
|
|
460
|
+
*
|
|
461
|
+
* 优先保留 REMAINING 行;否则截断首段文本,避免长篇规划污染下一轮输入。
|
|
462
|
+
* Prefer REMAINING line; otherwise keep a short excerpt to avoid long planning spillover.
|
|
463
|
+
*/
|
|
464
|
+
const normalizeModelOutput = (text) => {
|
|
465
|
+
if (!text) return "";
|
|
466
|
+
const trimmed = text.trim();
|
|
467
|
+
if (!trimmed) return "";
|
|
468
|
+
const remainingMatch = trimmed.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
469
|
+
if (remainingMatch) return `REMAINING: ${remainingMatch[1].trim()}`;
|
|
470
|
+
return (trimmed.split(/\n\s*\n/)[0]?.trim() ?? trimmed).slice(0, 220);
|
|
471
|
+
};
|
|
472
|
+
/**
|
|
473
|
+
* 判定动作是否会触发 DOM 结构变化(中)/ Whether action may cause DOM-shape change (EN).
|
|
474
|
+
*
|
|
475
|
+
* 触发后应强制断轮,等待下一轮新快照继续。
|
|
476
|
+
* Force round break after such action and continue with refreshed snapshot next round.
|
|
477
|
+
*/
|
|
478
|
+
const shouldForceRoundBreak = (toolName, toolInput) => {
|
|
479
|
+
const action = getToolAction(toolInput);
|
|
480
|
+
if (toolName === "navigate") return action === "goto" || action === "back" || action === "forward" || action === "reload";
|
|
481
|
+
if (toolName === "dom") return action === "click" || action === "press";
|
|
482
|
+
if (toolName === "evaluate") return true;
|
|
483
|
+
return false;
|
|
484
|
+
};
|
|
485
|
+
/**
|
|
486
|
+
* 将“找不到元素”的失败任务整理成可重试清单(中)/ Build retry task list for not-found failures (EN).
|
|
487
|
+
*/
|
|
488
|
+
const collectMissingTask = (name, input, result) => {
|
|
489
|
+
if (!isElementNotFoundResult(result)) return null;
|
|
490
|
+
return {
|
|
491
|
+
name,
|
|
492
|
+
input,
|
|
493
|
+
reason: toContentString(result.content).slice(0, 240)
|
|
494
|
+
};
|
|
495
|
+
};
|
|
496
|
+
/**
|
|
497
|
+
* 解析 REMAINING 协议(中)/ Parse REMAINING protocol from model text (EN).
|
|
498
|
+
*
|
|
499
|
+
* 支持:
|
|
500
|
+
* - `REMAINING: <text>` → 继续下一轮消费该剩余文本
|
|
501
|
+
* - `REMAINING: DONE` → 剩余任务为空
|
|
502
|
+
* 返回 null 表示本轮没有提供 REMAINING 标记。
|
|
503
|
+
*/
|
|
504
|
+
const parseRemainingInstruction = (text) => {
|
|
505
|
+
if (!text) return null;
|
|
506
|
+
const match = text.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
507
|
+
if (!match) return null;
|
|
508
|
+
const value = match[1].trim();
|
|
509
|
+
return /^done$/i.test(value) ? "" : value;
|
|
510
|
+
};
|
|
511
|
+
/**
|
|
512
|
+
* 推进下一轮描述(中)/ Derive next-round instruction from model text (EN).
|
|
513
|
+
*
|
|
514
|
+
* 优先 REMAINING 协议;若未提供,则保持当前 remaining 不变。
|
|
515
|
+
* Priority: REMAINING protocol first; otherwise keep current remaining instruction unchanged.
|
|
516
|
+
*/
|
|
517
|
+
const deriveNextInstruction = (text, currentInstruction) => {
|
|
518
|
+
const parsed = parseRemainingInstruction(text);
|
|
519
|
+
if (parsed !== null) return {
|
|
520
|
+
nextInstruction: parsed,
|
|
521
|
+
hasRemainingProtocol: true
|
|
522
|
+
};
|
|
523
|
+
return {
|
|
524
|
+
nextInstruction: currentInstruction,
|
|
525
|
+
hasRemainingProtocol: false
|
|
526
|
+
};
|
|
527
|
+
};
|
|
528
|
+
/**
|
|
529
|
+
* 启发式任务剔除(中)/ Heuristic remaining reduction for linear instructions (EN).
|
|
530
|
+
*
|
|
531
|
+
* 在 REMAINING 缺失但本轮有执行动作时,按“线性片段”剔除已执行步数,避免下一轮继续携带整段原任务。
|
|
532
|
+
* When REMAINING is missing but actions were executed, drop executed step count from a linearized instruction.
|
|
533
|
+
*/
|
|
534
|
+
const reduceRemainingHeuristically = (currentInstruction, executedCount) => {
|
|
535
|
+
if (!currentInstruction.trim() || executedCount <= 0) return currentInstruction;
|
|
536
|
+
const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").replace(/[,,。;;]/g, " 然后 ").split(/\s*(?:然后|再|并且|并|接着|随后|之后)\s*/g).map((part) => part.trim()).filter(Boolean);
|
|
537
|
+
if (parts.length <= 1) return currentInstruction;
|
|
538
|
+
const nextParts = parts.slice(Math.min(executedCount, parts.length));
|
|
539
|
+
if (nextParts.length === 0) return "";
|
|
540
|
+
return nextParts.join(" -> ");
|
|
541
|
+
};
|
|
203
542
|
for (let round = 0; round < maxRounds; round++) {
|
|
204
543
|
callbacks?.onRound?.(round);
|
|
205
|
-
|
|
206
|
-
|
|
544
|
+
usedRounds = round + 1;
|
|
545
|
+
if (!pageContext.latestSnapshot) await refreshSnapshot();
|
|
546
|
+
const effectivePrompt = stripSnapshotFromPrompt(systemPrompt);
|
|
547
|
+
const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint);
|
|
548
|
+
if (pendingNotFoundRetry && pendingNotFoundRetry.tasks.length > 0) chatMessages.push({
|
|
549
|
+
role: "user",
|
|
550
|
+
content: [
|
|
551
|
+
"## Not-found retry context",
|
|
552
|
+
`Retry attempt: ${pendingNotFoundRetry.attempt}/${DEFAULT_NOT_FOUND_RETRY_ROUNDS}`,
|
|
553
|
+
"These tool targets were not found in previous execution:",
|
|
554
|
+
...pendingNotFoundRetry.tasks.map((task, i) => `${i + 1}. ${task.name}(${JSON.stringify(task.input)}) -> ${task.reason}`),
|
|
555
|
+
"Only retry unresolved targets that are now visible in the latest snapshot.",
|
|
556
|
+
"If still not found, return no tool calls and include REMAINING with the unresolved part."
|
|
557
|
+
].join("\n")
|
|
558
|
+
});
|
|
207
559
|
const response = await client.chat({
|
|
208
560
|
systemPrompt: effectivePrompt,
|
|
209
561
|
messages: chatMessages,
|
|
210
562
|
tools
|
|
211
563
|
});
|
|
564
|
+
inputTokens += response.usage?.inputTokens ?? 0;
|
|
565
|
+
outputTokens += response.usage?.outputTokens ?? 0;
|
|
566
|
+
const parsedInstructionState = deriveNextInstruction(response.text, remainingInstruction);
|
|
212
567
|
if (!response.toolCalls || response.toolCalls.length === 0) {
|
|
568
|
+
if (pendingNotFoundRetry) {
|
|
569
|
+
const unresolvedHint = response.text?.toLowerCase() ?? "";
|
|
570
|
+
if ((unresolvedHint.includes("找不到") || unresolvedHint.includes("未找到") || unresolvedHint.includes("not found") || unresolvedHint.includes("cannot find") || unresolvedHint.includes("unable to locate")) && pendingNotFoundRetry.attempt < DEFAULT_NOT_FOUND_RETRY_ROUNDS) {
|
|
571
|
+
pendingNotFoundRetry = {
|
|
572
|
+
...pendingNotFoundRetry,
|
|
573
|
+
attempt: pendingNotFoundRetry.attempt + 1
|
|
574
|
+
};
|
|
575
|
+
callbacks?.onText?.(`未命中目标,准备第 ${pendingNotFoundRetry.attempt} 次重试(等待 ${DEFAULT_NOT_FOUND_RETRY_WAIT_MS}ms)...`);
|
|
576
|
+
await sleep$1(DEFAULT_NOT_FOUND_RETRY_WAIT_MS);
|
|
577
|
+
await refreshSnapshot();
|
|
578
|
+
continue;
|
|
579
|
+
}
|
|
580
|
+
pendingNotFoundRetry = void 0;
|
|
581
|
+
}
|
|
582
|
+
if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
|
|
583
|
+
if (remainingInstruction.trim().length > 0 && round < maxRounds - 1) {
|
|
584
|
+
protocolViolationHint = [
|
|
585
|
+
"Protocol violation in previous round:",
|
|
586
|
+
"- Remaining task is not DONE, but no tool calls were returned.",
|
|
587
|
+
"This round MUST do one of:",
|
|
588
|
+
"1) Return actionable tool calls for visible targets; or",
|
|
589
|
+
"2) If truly complete, return a short summary and EXACTLY `REMAINING: DONE`.",
|
|
590
|
+
"Do NOT output planning/explaining text."
|
|
591
|
+
].join("\n");
|
|
592
|
+
lastRoundHadError = true;
|
|
593
|
+
await refreshSnapshot();
|
|
594
|
+
continue;
|
|
595
|
+
}
|
|
213
596
|
finalReply = response.text ?? "";
|
|
214
597
|
if (finalReply) callbacks?.onText?.(finalReply);
|
|
215
598
|
break;
|
|
216
599
|
}
|
|
217
|
-
|
|
600
|
+
protocolViolationHint = void 0;
|
|
601
|
+
const plannedTasksCurrentRound = buildTaskArray(response.toolCalls.map((tc) => ({
|
|
602
|
+
name: tc.name,
|
|
603
|
+
input: tc.input
|
|
604
|
+
})));
|
|
605
|
+
const plannedBatchKey = JSON.stringify(response.toolCalls.map((tc) => ({
|
|
606
|
+
name: tc.name,
|
|
607
|
+
input: tc.input
|
|
608
|
+
})));
|
|
609
|
+
if (plannedBatchKey === lastPlannedBatchKey) consecutiveSamePlannedBatch += 1;
|
|
610
|
+
else {
|
|
611
|
+
consecutiveSamePlannedBatch = 1;
|
|
612
|
+
lastPlannedBatchKey = plannedBatchKey;
|
|
613
|
+
}
|
|
614
|
+
if (consecutiveSamePlannedBatch >= 2 && !lastRoundHadError) {
|
|
615
|
+
finalReply = response.text?.trim() || "任务已完成。";
|
|
616
|
+
if (finalReply) callbacks?.onText?.(finalReply);
|
|
617
|
+
break;
|
|
618
|
+
}
|
|
218
619
|
if (dryRun) {
|
|
219
620
|
finalReply = response.text ? response.text + "\n\n" : "";
|
|
220
621
|
finalReply += "🔧 AI 请求调用以下工具(dry-run 模式,未执行):\n";
|
|
@@ -229,103 +630,64 @@ async function executeAgentLoop(params) {
|
|
|
229
630
|
}
|
|
230
631
|
break;
|
|
231
632
|
}
|
|
633
|
+
let roundHasError = false;
|
|
634
|
+
const executedTaskCalls = [];
|
|
635
|
+
const roundMissingTasks = [];
|
|
232
636
|
for (const tc of response.toolCalls) {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
callbacks?.onBeforeRecoverySnapshot?.(latestUrl);
|
|
240
|
-
pageContext.latestSnapshot = await readPageSnapshot(registry, 8);
|
|
241
|
-
if (tc.name === "dom") {
|
|
242
|
-
const result = {
|
|
243
|
-
content: `URL 已变更为 ${latestUrl},请基于最新快照重新定位目标元素。`,
|
|
244
|
-
details: {
|
|
245
|
-
error: true,
|
|
246
|
-
code: "URL_CHANGED_REQUIRE_NEW_SNAPSHOT",
|
|
247
|
-
url: latestUrl
|
|
248
|
-
}
|
|
249
|
-
};
|
|
250
|
-
allToolCalls.push({
|
|
251
|
-
name: tc.name,
|
|
252
|
-
input: tc.input,
|
|
253
|
-
result
|
|
254
|
-
});
|
|
255
|
-
fullToolTrace.push({
|
|
256
|
-
round,
|
|
257
|
-
name: tc.name,
|
|
258
|
-
input: tc.input,
|
|
259
|
-
result,
|
|
260
|
-
marker: "[URL变化待重定位]"
|
|
261
|
-
});
|
|
262
|
-
callbacks?.onToolResult?.(tc.name, result);
|
|
263
|
-
continue;
|
|
264
|
-
}
|
|
265
|
-
}
|
|
637
|
+
const redundant = checkRedundantSnapshot(tc.name, tc.input, pageContext.latestSnapshot, round);
|
|
638
|
+
if (redundant) {
|
|
639
|
+
appendToolTrace(round, tc.name, tc.input, redundant);
|
|
640
|
+
redundantInterceptCount += 1;
|
|
641
|
+
callbacks?.onToolResult?.(tc.name, redundant);
|
|
642
|
+
continue;
|
|
266
643
|
}
|
|
644
|
+
callbacks?.onToolCall?.(tc.name, tc.input);
|
|
267
645
|
let result = await registry.dispatch(tc.name, tc.input);
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
pageContext.latestSnapshot = await readPageSnapshot(registry, 8);
|
|
277
|
-
result = {
|
|
278
|
-
content: [
|
|
279
|
-
toContentString(result.content),
|
|
280
|
-
"",
|
|
281
|
-
`自动恢复 ${attempts}/${DEFAULT_ACTION_RECOVERY_ROUNDS}:已刷新快照,请重新定位目标元素。`
|
|
282
|
-
].join("\n"),
|
|
283
|
-
details: {
|
|
284
|
-
error: true,
|
|
285
|
-
code: "ELEMENT_NOT_FOUND_RECOVERY",
|
|
286
|
-
recoveryAttempt: attempts,
|
|
287
|
-
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
288
|
-
}
|
|
289
|
-
};
|
|
290
|
-
} else result = {
|
|
291
|
-
content: [
|
|
292
|
-
toContentString(result.content),
|
|
293
|
-
"",
|
|
294
|
-
`已达到最大自动恢复次数(${DEFAULT_ACTION_RECOVERY_ROUNDS})。请调整操作目标后重试。`
|
|
295
|
-
].join("\n"),
|
|
296
|
-
details: {
|
|
297
|
-
error: true,
|
|
298
|
-
code: "ELEMENT_NOT_FOUND_MAX_RECOVERY_REACHED",
|
|
299
|
-
recoveryAttempt: attempts,
|
|
300
|
-
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
301
|
-
}
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
allToolCalls.push({
|
|
646
|
+
const debounced = applySnapshotDebounce(tc.name, tc.input, result, consecutiveSnapshotCalls);
|
|
647
|
+
result = debounced.result;
|
|
648
|
+
consecutiveSnapshotCalls = debounced.consecutiveCount;
|
|
649
|
+
const recovered = await handleElementRecovery(tc.name, tc.input, result, actionRecoveryAttempts, registry, pageContext, callbacks);
|
|
650
|
+
if (recovered) result = recovered;
|
|
651
|
+
if (recovered?.details && typeof recovered.details === "object" && recovered.details.code === "ELEMENT_NOT_FOUND_RECOVERY") recoveryCount += 1;
|
|
652
|
+
appendToolTrace(round, tc.name, tc.input, result);
|
|
653
|
+
executedTaskCalls.push({
|
|
305
654
|
name: tc.name,
|
|
306
|
-
input: tc.input
|
|
307
|
-
result
|
|
308
|
-
});
|
|
309
|
-
fullToolTrace.push({
|
|
310
|
-
round,
|
|
311
|
-
name: tc.name,
|
|
312
|
-
input: tc.input,
|
|
313
|
-
result
|
|
655
|
+
input: tc.input
|
|
314
656
|
});
|
|
315
|
-
|
|
316
|
-
if (
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
pageContext.currentUrl = newUrl;
|
|
322
|
-
callbacks?.onBeforeRecoverySnapshot?.(newUrl);
|
|
323
|
-
pageContext.latestSnapshot = await readPageSnapshot(registry, 8);
|
|
324
|
-
}
|
|
325
|
-
}
|
|
657
|
+
const missingTask = collectMissingTask(tc.name, tc.input, result);
|
|
658
|
+
if (missingTask) roundMissingTasks.push(missingTask);
|
|
659
|
+
if (result.details && typeof result.details === "object") roundHasError = roundHasError || Boolean(result.details.error);
|
|
660
|
+
if (tc.name === "page_info" && getToolAction(tc.input) === "snapshot") {
|
|
661
|
+
pageContext.latestSnapshot = toContentString(result.content);
|
|
662
|
+
recordSnapshotStats(pageContext.latestSnapshot);
|
|
326
663
|
}
|
|
664
|
+
await handleNavigationUrlChange(tc.name, tc.input, result, registry, pageContext, callbacks);
|
|
327
665
|
callbacks?.onToolResult?.(tc.name, result);
|
|
666
|
+
if (shouldForceRoundBreak(tc.name, tc.input)) break;
|
|
667
|
+
}
|
|
668
|
+
if (roundMissingTasks.length > 0) pendingNotFoundRetry = {
|
|
669
|
+
attempt: 1,
|
|
670
|
+
tasks: roundMissingTasks
|
|
671
|
+
};
|
|
672
|
+
else pendingNotFoundRetry = void 0;
|
|
673
|
+
if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
|
|
674
|
+
else {
|
|
675
|
+
const nextByHeuristic = reduceRemainingHeuristically(remainingInstruction, executedTaskCalls.length);
|
|
676
|
+
if (nextByHeuristic !== remainingInstruction) remainingInstruction = nextByHeuristic;
|
|
677
|
+
else roundHasError = true;
|
|
328
678
|
}
|
|
679
|
+
previousRoundModelOutput = parsedInstructionState.hasRemainingProtocol ? normalizeModelOutput(response.text) : `REMAINING: ${remainingInstruction || "DONE"}`;
|
|
680
|
+
lastRoundHadError = roundHasError;
|
|
681
|
+
previousRoundTasks = buildTaskArray(executedTaskCalls);
|
|
682
|
+
previousRoundPlannedTasks = plannedTasksCurrentRound;
|
|
683
|
+
const idleResult = detectIdleLoop(executedTaskCalls.map((tc) => tc.name), consecutiveReadOnlyRounds);
|
|
684
|
+
if (idleResult === -1) {
|
|
685
|
+
finalReply = response.text || "任务已完成。";
|
|
686
|
+
if (finalReply) callbacks?.onText?.(finalReply);
|
|
687
|
+
break;
|
|
688
|
+
}
|
|
689
|
+
consecutiveReadOnlyRounds = idleResult;
|
|
690
|
+
await refreshSnapshot();
|
|
329
691
|
}
|
|
330
692
|
const resultMessages = [...history ?? [], {
|
|
331
693
|
role: "user",
|
|
@@ -335,70 +697,146 @@ async function executeAgentLoop(params) {
|
|
|
335
697
|
role: "assistant",
|
|
336
698
|
content: finalReply
|
|
337
699
|
});
|
|
700
|
+
const successfulToolCalls = allToolCalls.filter((tc) => {
|
|
701
|
+
const details = tc.result.details;
|
|
702
|
+
return !(details && typeof details === "object" && Boolean(details.error));
|
|
703
|
+
}).length;
|
|
704
|
+
const failedToolCalls = allToolCalls.length - successfulToolCalls;
|
|
705
|
+
const metrics = {
|
|
706
|
+
roundCount: usedRounds,
|
|
707
|
+
totalToolCalls: allToolCalls.length,
|
|
708
|
+
successfulToolCalls,
|
|
709
|
+
failedToolCalls,
|
|
710
|
+
toolSuccessRate: allToolCalls.length > 0 ? Number((successfulToolCalls / allToolCalls.length).toFixed(4)) : 1,
|
|
711
|
+
recoveryCount,
|
|
712
|
+
redundantInterceptCount,
|
|
713
|
+
snapshotReadCount,
|
|
714
|
+
latestSnapshotSize: pageContext.latestSnapshot?.length ?? 0,
|
|
715
|
+
avgSnapshotSize: snapshotReadCount > 0 ? Math.round(snapshotSizeTotal / snapshotReadCount) : 0,
|
|
716
|
+
maxSnapshotSize: snapshotSizeMax,
|
|
717
|
+
inputTokens,
|
|
718
|
+
outputTokens
|
|
719
|
+
};
|
|
720
|
+
callbacks?.onMetrics?.(metrics);
|
|
338
721
|
return {
|
|
339
722
|
reply: finalReply,
|
|
340
723
|
toolCalls: allToolCalls,
|
|
341
|
-
messages: resultMessages
|
|
724
|
+
messages: resultMessages,
|
|
725
|
+
metrics
|
|
342
726
|
};
|
|
343
727
|
}
|
|
344
728
|
|
|
345
729
|
//#endregion
|
|
346
730
|
//#region src/core/ai-client/constants.ts
|
|
347
|
-
/**
|
|
348
|
-
* 各 Provider 的默认 API 端点。
|
|
349
|
-
*
|
|
350
|
-
* - openai → OpenAI 官方 API
|
|
351
|
-
* - copilot → GitHub Models API(使用 OpenAI 兼容格式)
|
|
352
|
-
* - anthropic → Anthropic Messages API
|
|
353
|
-
*/
|
|
731
|
+
/** 默认端点映射(中)/ Default API endpoints by provider (EN). */
|
|
354
732
|
const PROVIDER_ENDPOINTS = {
|
|
355
733
|
openai: "https://api.openai.com/v1",
|
|
356
734
|
copilot: "https://models.inference.ai.azure.com",
|
|
357
735
|
anthropic: "https://api.anthropic.com",
|
|
358
736
|
deepseek: "https://api.deepseek.com"
|
|
359
737
|
};
|
|
360
|
-
/**
|
|
361
|
-
* 校验 provider 是否受支持。
|
|
362
|
-
*
|
|
363
|
-
* @throws 不支持的 provider 抛出 Error,附带支持列表
|
|
364
|
-
*/
|
|
738
|
+
/** 校验 provider(中)/ Validate provider support (EN). */
|
|
365
739
|
function validateProvider(provider) {
|
|
366
740
|
if (!PROVIDER_ENDPOINTS[provider]) {
|
|
367
741
|
const supported = Object.keys(PROVIDER_ENDPOINTS).join(", ");
|
|
368
742
|
throw new Error(`Unknown AI provider: ${provider}. Supported: ${supported}`);
|
|
369
743
|
}
|
|
370
744
|
}
|
|
371
|
-
/**
|
|
372
|
-
* 解析 provider 对应的 API 基础 URL。
|
|
373
|
-
*
|
|
374
|
-
* 优先使用用户自定义的 baseURL(如本地 Ollama),
|
|
375
|
-
* 其次使用 PROVIDER_ENDPOINTS 中的默认值。
|
|
376
|
-
*/
|
|
745
|
+
/** 解析 baseURL(中)/ Resolve API base URL (EN). */
|
|
377
746
|
function resolveBaseURL(config) {
|
|
378
747
|
return config.baseURL ?? PROVIDER_ENDPOINTS[config.provider] ?? "";
|
|
379
748
|
}
|
|
380
749
|
/**
|
|
381
|
-
* 清理
|
|
382
|
-
*
|
|
383
|
-
* TypeBox 的 Type.Object() 产物包含 Symbol key(如 [Kind]、[Hint]),
|
|
384
|
-
* 这些 Symbol 在 JSON.stringify 时会被忽略,但某些 AI API 端点
|
|
385
|
-
* 对 JSON Schema 做严格校验时可能报错。
|
|
386
|
-
*
|
|
387
|
-
* 通过 JSON roundtrip(stringify → parse)清理掉所有不可序列化的属性。
|
|
750
|
+
* 清理 schema(中)/ Clean non-serializable fields from schema (EN).
|
|
388
751
|
*/
|
|
389
752
|
function cleanSchema(schema) {
|
|
390
753
|
return JSON.parse(JSON.stringify(schema));
|
|
391
754
|
}
|
|
392
755
|
|
|
393
756
|
//#endregion
|
|
394
|
-
//#region src/core/ai-client/
|
|
757
|
+
//#region src/core/ai-client/sse.ts
|
|
395
758
|
/**
|
|
396
|
-
*
|
|
759
|
+
* 通用 SSE(JSON) 消费器(中)/ Generic SSE(JSON) consumer (EN).
|
|
397
760
|
*
|
|
398
|
-
*
|
|
399
|
-
*
|
|
400
|
-
|
|
401
|
-
|
|
761
|
+
* 读取 response.body,按 SSE 规则拼装并分发 JSON data 事件。
|
|
762
|
+
* Reads response body, assembles SSE frames, and dispatches JSON data events.
|
|
763
|
+
*/
|
|
764
|
+
async function consumeSSEJSON(response, onEvent, options = {}) {
|
|
765
|
+
if (!response.body) return;
|
|
766
|
+
const reader = response.body.getReader();
|
|
767
|
+
const decoder = new TextDecoder();
|
|
768
|
+
const stopOnDone = options.stopOnDone ?? true;
|
|
769
|
+
let buffer = "";
|
|
770
|
+
let currentEvent;
|
|
771
|
+
let dataLines = [];
|
|
772
|
+
let stoppedByDone = false;
|
|
773
|
+
async function readChunk() {
|
|
774
|
+
const readTimeoutMs = options.readTimeoutMs;
|
|
775
|
+
if (!readTimeoutMs || readTimeoutMs <= 0) return reader.read();
|
|
776
|
+
return new Promise((resolve, reject) => {
|
|
777
|
+
const timer = setTimeout(() => {
|
|
778
|
+
reject(/* @__PURE__ */ new Error(`SSE read timeout (${readTimeoutMs}ms)`));
|
|
779
|
+
}, readTimeoutMs);
|
|
780
|
+
reader.read().then((value) => {
|
|
781
|
+
clearTimeout(timer);
|
|
782
|
+
resolve(value);
|
|
783
|
+
}, (error) => {
|
|
784
|
+
clearTimeout(timer);
|
|
785
|
+
reject(error);
|
|
786
|
+
});
|
|
787
|
+
});
|
|
788
|
+
}
|
|
789
|
+
async function flushEvent() {
|
|
790
|
+
if (dataLines.length === 0) {
|
|
791
|
+
currentEvent = void 0;
|
|
792
|
+
return true;
|
|
793
|
+
}
|
|
794
|
+
const rawData = dataLines.join("\n").trim();
|
|
795
|
+
const event = currentEvent;
|
|
796
|
+
dataLines = [];
|
|
797
|
+
currentEvent = void 0;
|
|
798
|
+
if (!rawData) return true;
|
|
799
|
+
if (stopOnDone && rawData === "[DONE]") {
|
|
800
|
+
stoppedByDone = true;
|
|
801
|
+
return false;
|
|
802
|
+
}
|
|
803
|
+
try {
|
|
804
|
+
if (await onEvent(JSON.parse(rawData), {
|
|
805
|
+
event,
|
|
806
|
+
rawData
|
|
807
|
+
}) === false) return false;
|
|
808
|
+
} catch {}
|
|
809
|
+
return true;
|
|
810
|
+
}
|
|
811
|
+
while (true) {
|
|
812
|
+
const { done, value } = await readChunk();
|
|
813
|
+
if (done) break;
|
|
814
|
+
buffer += decoder.decode(value, { stream: true });
|
|
815
|
+
const lines = buffer.split("\n");
|
|
816
|
+
buffer = lines.pop() ?? "";
|
|
817
|
+
for (const rawLine of lines) {
|
|
818
|
+
const trimmed = (rawLine.endsWith("\r") ? rawLine.slice(0, -1) : rawLine).trim();
|
|
819
|
+
if (!trimmed) {
|
|
820
|
+
if (!await flushEvent()) break;
|
|
821
|
+
continue;
|
|
822
|
+
}
|
|
823
|
+
if (trimmed.startsWith(":")) continue;
|
|
824
|
+
if (trimmed.startsWith("event:")) {
|
|
825
|
+
currentEvent = trimmed.slice(6).trim() || void 0;
|
|
826
|
+
continue;
|
|
827
|
+
}
|
|
828
|
+
if (trimmed.startsWith("data:")) dataLines.push(trimmed.slice(5).trimStart());
|
|
829
|
+
}
|
|
830
|
+
if (stoppedByDone) break;
|
|
831
|
+
}
|
|
832
|
+
if (!stoppedByDone) await flushEvent();
|
|
833
|
+
else await reader.cancel().catch(() => void 0);
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
//#endregion
|
|
837
|
+
//#region src/core/ai-client/custom.ts
|
|
838
|
+
/**
|
|
839
|
+
* BaseAIClient 实现(中)/ BaseAIClient implementation of AIClient (EN).
|
|
402
840
|
*/
|
|
403
841
|
var BaseAIClient = class {
|
|
404
842
|
/** 用户提供的对话处理函数 */
|
|
@@ -407,47 +845,21 @@ var BaseAIClient = class {
|
|
|
407
845
|
this.chatHandler = options.chatHandler;
|
|
408
846
|
}
|
|
409
847
|
/**
|
|
410
|
-
*
|
|
411
|
-
*
|
|
412
|
-
* 默认实现直接委托给 `chatHandler`。
|
|
413
|
-
* 子类可覆盖此方法添加中间件逻辑(日志、重试、缓存等)。
|
|
414
|
-
*
|
|
415
|
-
* @param params - 统一格式的聊天参数
|
|
416
|
-
* @returns 统一格式的 AI 响应
|
|
848
|
+
* 发送对话请求(中)/ Dispatch chat request via handler (EN).
|
|
417
849
|
*/
|
|
418
850
|
async chat(params) {
|
|
419
851
|
return this.chatHandler(params);
|
|
420
852
|
}
|
|
853
|
+
/** SSE 消费复用入口(中)/ Reusable SSE(JSON) consumer for subclasses (EN). */
|
|
854
|
+
async consumeSSEJSON(response, onEvent, options) {
|
|
855
|
+
return consumeSSEJSON(response, onEvent, options);
|
|
856
|
+
}
|
|
421
857
|
};
|
|
422
858
|
|
|
423
859
|
//#endregion
|
|
424
860
|
//#region src/core/ai-client/openai.ts
|
|
425
861
|
/**
|
|
426
|
-
*
|
|
427
|
-
*
|
|
428
|
-
* 封装完整的 OpenAI Chat Completions API 调用流程:
|
|
429
|
-
* 1. buildOpenAIRequest() → 构建 HTTP 请求
|
|
430
|
-
* 2. fetch() → 发送请求
|
|
431
|
-
* 3. parseOpenAIResponse() → 解析响应为统一格式
|
|
432
|
-
*
|
|
433
|
-
* 使用示例:
|
|
434
|
-
* ```ts
|
|
435
|
-
* const client = new OpenAIClient({
|
|
436
|
-
* provider: "openai",
|
|
437
|
-
* model: "gpt-4o",
|
|
438
|
-
* apiKey: "sk-xxx",
|
|
439
|
-
* });
|
|
440
|
-
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
441
|
-
* ```
|
|
442
|
-
*
|
|
443
|
-
* 也可用于 Copilot(GitHub Models):
|
|
444
|
-
* ```ts
|
|
445
|
-
* const client = new OpenAIClient({
|
|
446
|
-
* provider: "copilot",
|
|
447
|
-
* model: "gpt-4o",
|
|
448
|
-
* apiKey: "ghp_xxx",
|
|
449
|
-
* });
|
|
450
|
-
* ```
|
|
862
|
+
* OpenAIClient 类(中)/ OpenAIClient class for OpenAI & Copilot (EN).
|
|
451
863
|
*/
|
|
452
864
|
var OpenAIClient = class extends BaseAIClient {
|
|
453
865
|
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
@@ -455,30 +867,35 @@ var OpenAIClient = class extends BaseAIClient {
|
|
|
455
867
|
constructor(config) {
|
|
456
868
|
super({ chatHandler: async (params) => {
|
|
457
869
|
const req = buildOpenAIRequest(this.config, params);
|
|
458
|
-
|
|
870
|
+
if (!(this.config.stream ?? true)) {
|
|
871
|
+
const res = await fetch(req.url, {
|
|
872
|
+
method: req.method,
|
|
873
|
+
headers: req.headers,
|
|
874
|
+
body: req.body
|
|
875
|
+
});
|
|
876
|
+
if (!res.ok) {
|
|
877
|
+
const errText = await res.text();
|
|
878
|
+
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
879
|
+
}
|
|
880
|
+
return parseOpenAIResponse(await res.json());
|
|
881
|
+
}
|
|
882
|
+
const streamRes = await fetch(req.url, {
|
|
459
883
|
method: req.method,
|
|
460
884
|
headers: req.headers,
|
|
461
885
|
body: req.body
|
|
462
886
|
});
|
|
463
|
-
if (!
|
|
464
|
-
const errText = await
|
|
465
|
-
throw new Error(`AI API ${
|
|
887
|
+
if (!streamRes.ok) {
|
|
888
|
+
const errText = await streamRes.text();
|
|
889
|
+
throw new Error(`AI API ${streamRes.status}: ${errText.slice(0, 500)}`);
|
|
466
890
|
}
|
|
467
|
-
return parseOpenAIResponse(await
|
|
891
|
+
if ((streamRes.headers.get("content-type") ?? "").includes("application/json")) return parseOpenAIResponse(await streamRes.json());
|
|
892
|
+
return parseOpenAIStream(streamRes, 2e4);
|
|
468
893
|
} });
|
|
469
894
|
this.config = config;
|
|
470
895
|
}
|
|
471
896
|
};
|
|
472
897
|
/**
|
|
473
|
-
*
|
|
474
|
-
*
|
|
475
|
-
* 转换逻辑:
|
|
476
|
-
* - system prompt → `{ role: "system", content }` 消息
|
|
477
|
-
* - 工具定义 → `tools` 数组(function calling 格式)
|
|
478
|
-
* - 工具结果 → 拆分为多条 `{ role: "tool", tool_call_id }` 消息
|
|
479
|
-
* - AI 回复含工具调用 → `tool_calls` 字段
|
|
480
|
-
*
|
|
481
|
-
* 默认参数:temperature=0.3, max_tokens=8192, tool_choice="auto"
|
|
898
|
+
* 构建 OpenAI 请求(中)/ Build OpenAI chat request payload (EN).
|
|
482
899
|
*/
|
|
483
900
|
function buildOpenAIRequest(config, params) {
|
|
484
901
|
const baseURL = resolveBaseURL(config);
|
|
@@ -496,11 +913,16 @@ function buildOpenAIRequest(config, params) {
|
|
|
496
913
|
model: config.model,
|
|
497
914
|
messages: openaiMessages,
|
|
498
915
|
temperature: .3,
|
|
499
|
-
max_tokens:
|
|
916
|
+
max_tokens: 4096
|
|
500
917
|
};
|
|
918
|
+
if (config.stream ?? true) {
|
|
919
|
+
body.stream = true;
|
|
920
|
+
body.stream_options = { include_usage: true };
|
|
921
|
+
}
|
|
501
922
|
if (openaiTools && openaiTools.length > 0) {
|
|
502
923
|
body.tools = openaiTools;
|
|
503
924
|
body.tool_choice = "auto";
|
|
925
|
+
body.parallel_tool_calls = true;
|
|
504
926
|
}
|
|
505
927
|
return {
|
|
506
928
|
url: `${baseURL}/chat/completions`,
|
|
@@ -513,14 +935,7 @@ function buildOpenAIRequest(config, params) {
|
|
|
513
935
|
};
|
|
514
936
|
}
|
|
515
937
|
/**
|
|
516
|
-
*
|
|
517
|
-
*
|
|
518
|
-
* 解析要点:
|
|
519
|
-
* - 文本回复 → `choice.message.content`
|
|
520
|
-
* - 工具调用 → `choice.message.tool_calls`,arguments 为 JSON 字符串需 parse
|
|
521
|
-
* - Token 用量 → `usage.prompt_tokens` / `usage.completion_tokens`
|
|
522
|
-
*
|
|
523
|
-
* @throws 无有效 choice 时抛出 Error
|
|
938
|
+
* 解析 OpenAI 响应(中)/ Parse raw OpenAI response into AIChatResponse (EN).
|
|
524
939
|
*/
|
|
525
940
|
function parseOpenAIResponse(data) {
|
|
526
941
|
const d = data;
|
|
@@ -542,12 +957,7 @@ function parseOpenAIResponse(data) {
|
|
|
542
957
|
};
|
|
543
958
|
}
|
|
544
959
|
/**
|
|
545
|
-
*
|
|
546
|
-
*
|
|
547
|
-
* 三种特殊消息的处理:
|
|
548
|
-
* 1. tool 消息(工具结果)→ 每个结果拆分为单独的 `role: "tool"` 消息
|
|
549
|
-
* 2. assistant 含 toolCalls → 附带 `tool_calls` 字段
|
|
550
|
-
* 3. 其他消息 → 直接映射 role + content
|
|
960
|
+
* 消息转换(中)/ Convert unified messages to OpenAI format (EN).
|
|
551
961
|
*/
|
|
552
962
|
function convertMessages$1(systemPrompt, messages) {
|
|
553
963
|
const result = [{
|
|
@@ -577,26 +987,56 @@ function convertMessages$1(systemPrompt, messages) {
|
|
|
577
987
|
});
|
|
578
988
|
return result;
|
|
579
989
|
}
|
|
990
|
+
/**
|
|
991
|
+
* 解析 OpenAI SSE(中)/ Parse OpenAI SSE stream into unified response (EN).
|
|
992
|
+
*/
|
|
993
|
+
async function parseOpenAIStream(response, readTimeoutMs = 2e4) {
|
|
994
|
+
if (!response.body) return parseOpenAIResponse(await response.json());
|
|
995
|
+
let text = "";
|
|
996
|
+
const toolCallMap = /* @__PURE__ */ new Map();
|
|
997
|
+
let usage;
|
|
998
|
+
await consumeSSEJSON(response, (event) => {
|
|
999
|
+
const chunk = event;
|
|
1000
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
1001
|
+
if (delta?.content) text += delta.content;
|
|
1002
|
+
if (delta?.tool_calls) for (const tc of delta.tool_calls) {
|
|
1003
|
+
const idx = tc.index ?? 0;
|
|
1004
|
+
const existing = toolCallMap.get(idx);
|
|
1005
|
+
if (existing) {
|
|
1006
|
+
if (tc.function?.arguments) existing.arguments += tc.function.arguments;
|
|
1007
|
+
} else toolCallMap.set(idx, {
|
|
1008
|
+
id: tc.id ?? "",
|
|
1009
|
+
name: tc.function?.name ?? "",
|
|
1010
|
+
arguments: tc.function?.arguments ?? ""
|
|
1011
|
+
});
|
|
1012
|
+
}
|
|
1013
|
+
if (chunk.usage) usage = {
|
|
1014
|
+
inputTokens: chunk.usage.prompt_tokens ?? 0,
|
|
1015
|
+
outputTokens: chunk.usage.completion_tokens ?? 0
|
|
1016
|
+
};
|
|
1017
|
+
}, {
|
|
1018
|
+
readTimeoutMs,
|
|
1019
|
+
stopOnDone: true
|
|
1020
|
+
});
|
|
1021
|
+
const toolCalls = [];
|
|
1022
|
+
for (const [, tc] of [...toolCallMap.entries()].sort((a, b) => a[0] - b[0])) try {
|
|
1023
|
+
toolCalls.push({
|
|
1024
|
+
id: tc.id,
|
|
1025
|
+
name: tc.name,
|
|
1026
|
+
input: JSON.parse(tc.arguments)
|
|
1027
|
+
});
|
|
1028
|
+
} catch {}
|
|
1029
|
+
return {
|
|
1030
|
+
text: text || void 0,
|
|
1031
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
1032
|
+
usage
|
|
1033
|
+
};
|
|
1034
|
+
}
|
|
580
1035
|
|
|
581
1036
|
//#endregion
|
|
582
1037
|
//#region src/core/ai-client/anthropic.ts
|
|
583
1038
|
/**
|
|
584
|
-
*
|
|
585
|
-
*
|
|
586
|
-
* 封装完整的 Anthropic Messages API 调用流程:
|
|
587
|
-
* 1. buildAnthropicRequest() → 构建 HTTP 请求
|
|
588
|
-
* 2. fetch() → 发送请求
|
|
589
|
-
* 3. parseAnthropicResponse() → 解析响应为统一格式
|
|
590
|
-
*
|
|
591
|
-
* 使用示例:
|
|
592
|
-
* ```ts
|
|
593
|
-
* const client = new AnthropicClient({
|
|
594
|
-
* provider: "anthropic",
|
|
595
|
-
* model: "claude-sonnet-4-20250514",
|
|
596
|
-
* apiKey: "sk-ant-xxx",
|
|
597
|
-
* });
|
|
598
|
-
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
599
|
-
* ```
|
|
1039
|
+
* AnthropicClient 类(中)/ AnthropicClient class (EN).
|
|
600
1040
|
*/
|
|
601
1041
|
var AnthropicClient = class extends BaseAIClient {
|
|
602
1042
|
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
@@ -604,6 +1044,18 @@ var AnthropicClient = class extends BaseAIClient {
|
|
|
604
1044
|
constructor(config) {
|
|
605
1045
|
super({ chatHandler: async (params) => {
|
|
606
1046
|
const req = buildAnthropicRequest(this.config, params);
|
|
1047
|
+
if (!(this.config.stream ?? true)) {
|
|
1048
|
+
const res = await fetch(req.url, {
|
|
1049
|
+
method: req.method,
|
|
1050
|
+
headers: req.headers,
|
|
1051
|
+
body: req.body
|
|
1052
|
+
});
|
|
1053
|
+
if (!res.ok) {
|
|
1054
|
+
const errText = await res.text();
|
|
1055
|
+
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
1056
|
+
}
|
|
1057
|
+
return parseAnthropicResponse(await res.json());
|
|
1058
|
+
}
|
|
607
1059
|
const res = await fetch(req.url, {
|
|
608
1060
|
method: req.method,
|
|
609
1061
|
headers: req.headers,
|
|
@@ -613,22 +1065,14 @@ var AnthropicClient = class extends BaseAIClient {
|
|
|
613
1065
|
const errText = await res.text();
|
|
614
1066
|
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
615
1067
|
}
|
|
616
|
-
return parseAnthropicResponse(await res.json());
|
|
1068
|
+
if ((res.headers.get("content-type") ?? "").includes("application/json")) return parseAnthropicResponse(await res.json());
|
|
1069
|
+
return parseAnthropicStream(res);
|
|
617
1070
|
} });
|
|
618
1071
|
this.config = config;
|
|
619
1072
|
}
|
|
620
1073
|
};
|
|
621
1074
|
/**
|
|
622
|
-
*
|
|
623
|
-
*
|
|
624
|
-
* 关键格式差异(与 OpenAI 相比):
|
|
625
|
-
* - system prompt → body.system 字段(非消息数组元素)
|
|
626
|
-
* - 工具定义 → input_schema(而非 parameters)
|
|
627
|
-
* - 工具结果 → user 角色 + tool_result content block
|
|
628
|
-
* - AI 工具调用 → assistant 角色 + tool_use content block
|
|
629
|
-
*
|
|
630
|
-
* max_tokens 策略:opus 模型 16384,其他模型 8192。
|
|
631
|
-
* 认证头使用 `x-api-key`(而非 Authorization Bearer)。
|
|
1075
|
+
* 构建 Anthropic 请求(中)/ Build Anthropic Messages API request (EN).
|
|
632
1076
|
*/
|
|
633
1077
|
function buildAnthropicRequest(config, params) {
|
|
634
1078
|
const baseURL = resolveBaseURL(config);
|
|
@@ -645,6 +1089,7 @@ function buildAnthropicRequest(config, params) {
|
|
|
645
1089
|
system: systemPrompt,
|
|
646
1090
|
messages: anthropicMessages
|
|
647
1091
|
};
|
|
1092
|
+
if (config.stream ?? true) body.stream = true;
|
|
648
1093
|
if (anthropicTools && anthropicTools.length > 0) body.tools = anthropicTools;
|
|
649
1094
|
return {
|
|
650
1095
|
url: `${baseURL}/v1/messages`,
|
|
@@ -658,13 +1103,7 @@ function buildAnthropicRequest(config, params) {
|
|
|
658
1103
|
};
|
|
659
1104
|
}
|
|
660
1105
|
/**
|
|
661
|
-
*
|
|
662
|
-
*
|
|
663
|
-
* Anthropic 使用 content block 数组返回多种内容:
|
|
664
|
-
* - type="text" → 文本回复(可能多个,合并为一个字符串)
|
|
665
|
-
* - type="tool_use" → 工具调用(id + name + input)
|
|
666
|
-
*
|
|
667
|
-
* Token 用量字段名也不同:input_tokens / output_tokens(非 prompt_tokens)。
|
|
1106
|
+
* 解析 Anthropic 响应(中)/ Parse raw Anthropic response (EN).
|
|
668
1107
|
*/
|
|
669
1108
|
function parseAnthropicResponse(data) {
|
|
670
1109
|
const d = data;
|
|
@@ -684,12 +1123,7 @@ function parseAnthropicResponse(data) {
|
|
|
684
1123
|
};
|
|
685
1124
|
}
|
|
686
1125
|
/**
|
|
687
|
-
*
|
|
688
|
-
*
|
|
689
|
-
* 关键差异处理:
|
|
690
|
-
* 1. 过滤 system 消息(Anthropic 通过 body.system 传入)
|
|
691
|
-
* 2. tool 角色消息 → user 角色 + tool_result content block
|
|
692
|
-
* 3. assistant 含 toolCalls → text + tool_use content blocks
|
|
1126
|
+
* 消息格式转换(中)/ Convert unified messages to Anthropic format (EN).
|
|
693
1127
|
*/
|
|
694
1128
|
function convertMessages(messages) {
|
|
695
1129
|
return messages.filter((m) => m.role !== "system").map((m) => {
|
|
@@ -724,64 +1158,80 @@ function convertMessages(messages) {
|
|
|
724
1158
|
};
|
|
725
1159
|
});
|
|
726
1160
|
}
|
|
1161
|
+
/**
|
|
1162
|
+
* 解析 Anthropic SSE(中)/ Parse Anthropic SSE stream (EN).
|
|
1163
|
+
*/
|
|
1164
|
+
async function parseAnthropicStream(response) {
|
|
1165
|
+
if (!response.body) return parseAnthropicResponse(await response.json());
|
|
1166
|
+
let text = "";
|
|
1167
|
+
const toolCalls = [];
|
|
1168
|
+
let currentToolUse = null;
|
|
1169
|
+
let inputTokens = 0;
|
|
1170
|
+
let outputTokens = 0;
|
|
1171
|
+
await consumeSSEJSON(response, (event) => {
|
|
1172
|
+
switch (event.type) {
|
|
1173
|
+
case "message_start":
|
|
1174
|
+
inputTokens = event.message?.usage?.input_tokens ?? 0;
|
|
1175
|
+
break;
|
|
1176
|
+
case "content_block_start": {
|
|
1177
|
+
const block = event.content_block;
|
|
1178
|
+
if (block?.type === "tool_use") currentToolUse = {
|
|
1179
|
+
id: block.id ?? "",
|
|
1180
|
+
name: block.name ?? "",
|
|
1181
|
+
inputJson: ""
|
|
1182
|
+
};
|
|
1183
|
+
break;
|
|
1184
|
+
}
|
|
1185
|
+
case "content_block_delta": {
|
|
1186
|
+
const delta = event.delta;
|
|
1187
|
+
if (delta?.type === "text_delta") text += delta.text ?? "";
|
|
1188
|
+
else if (delta?.type === "input_json_delta" && currentToolUse) currentToolUse.inputJson += delta.partial_json ?? "";
|
|
1189
|
+
break;
|
|
1190
|
+
}
|
|
1191
|
+
case "content_block_stop":
|
|
1192
|
+
if (currentToolUse) {
|
|
1193
|
+
try {
|
|
1194
|
+
toolCalls.push({
|
|
1195
|
+
id: currentToolUse.id,
|
|
1196
|
+
name: currentToolUse.name,
|
|
1197
|
+
input: JSON.parse(currentToolUse.inputJson || "{}")
|
|
1198
|
+
});
|
|
1199
|
+
} catch {}
|
|
1200
|
+
currentToolUse = null;
|
|
1201
|
+
}
|
|
1202
|
+
break;
|
|
1203
|
+
case "message_delta":
|
|
1204
|
+
outputTokens = event.usage?.output_tokens ?? 0;
|
|
1205
|
+
break;
|
|
1206
|
+
}
|
|
1207
|
+
}, { stopOnDone: false });
|
|
1208
|
+
return {
|
|
1209
|
+
text: text || void 0,
|
|
1210
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
1211
|
+
usage: inputTokens > 0 || outputTokens > 0 ? {
|
|
1212
|
+
inputTokens,
|
|
1213
|
+
outputTokens
|
|
1214
|
+
} : void 0
|
|
1215
|
+
};
|
|
1216
|
+
}
|
|
727
1217
|
|
|
728
1218
|
//#endregion
|
|
729
1219
|
//#region src/core/ai-client/deepseek.ts
|
|
730
1220
|
/**
|
|
731
|
-
* DeepSeek
|
|
732
|
-
*
|
|
733
|
-
* DeepSeek 使用 OpenAI 兼容的 Chat Completions API 格式,
|
|
734
|
-
* 因此直接继承 OpenAIClient,复用请求构建和响应解析逻辑。
|
|
735
|
-
*
|
|
736
|
-
* 差异点(相对于 OpenAI):
|
|
737
|
-
* - 端点:https://api.deepseek.com(Constants 中配置)
|
|
738
|
-
* - 模型:deepseek-chat(V3)、deepseek-reasoner(R1)等
|
|
739
|
-
* - 认证:Authorization: Bearer <API Key>(与 OpenAI 相同)
|
|
740
|
-
* - tool_calls 格式与 OpenAI 完全一致
|
|
741
|
-
*
|
|
742
|
-
* 继承关系:
|
|
743
|
-
* BaseAIClient(custom.ts)
|
|
744
|
-
* └── OpenAIClient(openai.ts)
|
|
745
|
-
* └── DeepSeekClient(本文件)— 可覆盖默认参数
|
|
1221
|
+
* DeepSeek 客户端封装(中)/ DeepSeek client wrapper (EN).
|
|
746
1222
|
*
|
|
747
|
-
*
|
|
748
|
-
*
|
|
749
|
-
* const client = new DeepSeekClient({
|
|
750
|
-
* provider: "deepseek",
|
|
751
|
-
* model: "deepseek-chat",
|
|
752
|
-
* apiKey: "sk-xxx",
|
|
753
|
-
* });
|
|
754
|
-
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
755
|
-
* ```
|
|
756
|
-
*
|
|
757
|
-
* 参考文档:
|
|
758
|
-
* - Tool Calls: https://api-docs.deepseek.com/zh-cn/guides/tool_calls
|
|
759
|
-
* - Chat API: https://api-docs.deepseek.com/zh-cn/api/create-chat-completion/
|
|
1223
|
+
* DeepSeek 与 OpenAI Chat Completions 兼容,直接复用 OpenAIClient。
|
|
1224
|
+
* DeepSeek is OpenAI-compatible, so it reuses OpenAIClient behavior.
|
|
760
1225
|
*/
|
|
761
1226
|
/**
|
|
762
|
-
* DeepSeek
|
|
763
|
-
*
|
|
764
|
-
* DeepSeek API 与 OpenAI Chat Completions API 完全兼容,
|
|
765
|
-
* 包括 tool_calls、function calling、消息格式等。
|
|
766
|
-
*
|
|
767
|
-
* 如需自定义 DeepSeek 特有行为(如 strict 模式、思考模式等),
|
|
768
|
-
* 可在此类中覆盖相关方法。
|
|
1227
|
+
* DeepSeek 客户端类(中)/ DeepSeek client class extending OpenAIClient (EN).
|
|
769
1228
|
*/
|
|
770
1229
|
var DeepSeekClient = class extends OpenAIClient {};
|
|
771
1230
|
|
|
772
1231
|
//#endregion
|
|
773
1232
|
//#region src/core/ai-client/index.ts
|
|
774
1233
|
/**
|
|
775
|
-
* 创建 AI
|
|
776
|
-
*
|
|
777
|
-
* 根据 provider 自动创建对应的客户端类实例:
|
|
778
|
-
* - openai / copilot → new OpenAIClient(config)
|
|
779
|
-
* - anthropic → new AnthropicClient(config)
|
|
780
|
-
*
|
|
781
|
-
* 返回 AIClient 接口,调用 chat() 即可与 AI 对话。
|
|
782
|
-
*
|
|
783
|
-
* @param config - 包含 provider、model、apiKey 等配置
|
|
784
|
-
* @returns AIClient 实例(OpenAIClient 或 AnthropicClient)
|
|
1234
|
+
* 创建 AI 客户端(中)/ Create AI client by provider (EN).
|
|
785
1235
|
*/
|
|
786
1236
|
function createAIClient(config) {
|
|
787
1237
|
validateProvider(config.provider);
|
|
@@ -849,31 +1299,78 @@ var ToolRegistry = class {
|
|
|
849
1299
|
//#endregion
|
|
850
1300
|
//#region src/core/system-prompt.ts
|
|
851
1301
|
/**
|
|
852
|
-
*
|
|
853
|
-
|
|
1302
|
+
* 规范化额外指令(中)/ Normalize additional instructions (EN).
|
|
1303
|
+
*/
|
|
1304
|
+
function normalizeExtraInstructions(input) {
|
|
1305
|
+
if (!input) return [];
|
|
1306
|
+
return (Array.isArray(input) ? input : [input]).map((s) => s.trim()).filter(Boolean);
|
|
1307
|
+
}
|
|
1308
|
+
/**
|
|
1309
|
+
* 构建系统提示词(中)/ Build system prompt (EN).
|
|
1310
|
+
*
|
|
1311
|
+
* 约束:
|
|
1312
|
+
* - 输出给模型的提示词正文统一为英文。
|
|
1313
|
+
* - 中文仅用于代码注释,便于团队维护。
|
|
1314
|
+
*
|
|
1315
|
+
* Constraints:
|
|
1316
|
+
* - Prompt text sent to model stays English-only.
|
|
1317
|
+
* - Chinese content is used in code comments only for maintainability.
|
|
854
1318
|
*/
|
|
855
1319
|
function buildSystemPrompt(params = {}) {
|
|
856
1320
|
const sections = [];
|
|
857
|
-
sections.push(
|
|
1321
|
+
sections.push([
|
|
1322
|
+
"You are AutoPilot, an AI agent controlling the current web page via tools.",
|
|
1323
|
+
"",
|
|
1324
|
+
"## Core Rules",
|
|
1325
|
+
"- Work from CURRENT snapshot + CURRENT remaining task directly. Do not restate the request.",
|
|
1326
|
+
"- Treat each round as task reduction:",
|
|
1327
|
+
" Input: (1) current remaining task, (2) previous round executed actions, (3) actions you execute this round.",
|
|
1328
|
+
" Output: new remaining task after removing this-round actions.",
|
|
1329
|
+
"- Use only visible targets from snapshot. Use #hashID as selector. Do not guess CSS selectors.",
|
|
1330
|
+
"- Batch independent visible actions in one round. Do not split one form into many rounds unnecessarily.",
|
|
1331
|
+
"- If an action will change DOM (open modal, navigate), stop after that action batch and continue next round with new snapshot.",
|
|
1332
|
+
"- Do NOT call page_info (snapshot/query/get_url/get_title). Snapshot is already provided every round.",
|
|
1333
|
+
"- For dropdown/select, use dom action=select_option (or fill on select).",
|
|
1334
|
+
"- Do NOT interact with AutoPilot UI unless user explicitly asks.",
|
|
1335
|
+
"",
|
|
1336
|
+
"## Output Contract",
|
|
1337
|
+
"- Return tool calls for this round.",
|
|
1338
|
+
"- Also include one plain text line:",
|
|
1339
|
+
" REMAINING: <new remaining task after this round>",
|
|
1340
|
+
" or REMAINING: DONE",
|
|
1341
|
+
"",
|
|
1342
|
+
"## Minimal Example",
|
|
1343
|
+
"Task: click button -> type \"abc\" in input -> send",
|
|
1344
|
+
"Round1 execute: click button",
|
|
1345
|
+
"Remaining: type \"abc\" in input -> send",
|
|
1346
|
+
"Round2 execute: type \"abc\" in input",
|
|
1347
|
+
"Remaining: send",
|
|
1348
|
+
"Round3 execute: send",
|
|
1349
|
+
"Remaining: DONE"
|
|
1350
|
+
].join("\n"));
|
|
858
1351
|
const tools = params.tools ?? [];
|
|
859
1352
|
if (tools.length > 0) {
|
|
860
1353
|
const toolLines = tools.map((t) => `- **${t.name}**: ${t.description}`);
|
|
861
1354
|
sections.push("## Available Tools\n\n" + toolLines.join("\n") + "\n\nUse tools when needed to complete the user's request.");
|
|
862
1355
|
}
|
|
1356
|
+
if (params.thinkingLevel) sections.push(["## Reasoning Profile", `- Thinking level: ${params.thinkingLevel}`].join("\n"));
|
|
1357
|
+
const extraInstructions = normalizeExtraInstructions(params.extraInstructions);
|
|
1358
|
+
if (extraInstructions.length > 0) sections.push(["## Extra Instructions", ...extraInstructions.map((line) => `- ${line}`)].join("\n"));
|
|
863
1359
|
return sections.join("\n\n");
|
|
864
1360
|
}
|
|
865
1361
|
|
|
866
1362
|
//#endregion
|
|
867
|
-
//#region src/web/dom-tool.ts
|
|
1363
|
+
//#region src/web/tools/dom-tool.ts
|
|
868
1364
|
/**
|
|
869
1365
|
* DOM Tool — 基于 Web API 的 DOM 操作工具。
|
|
870
1366
|
*
|
|
871
1367
|
* 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
|
|
872
1368
|
* 运行环境:浏览器 Content Script。
|
|
873
1369
|
*
|
|
874
|
-
* 支持
|
|
1370
|
+
* 支持 12 种动作:
|
|
875
1371
|
* click — 点击元素
|
|
876
|
-
* fill —
|
|
1372
|
+
* fill — 填写可编辑控件(input/textarea/select/contenteditable)
|
|
1373
|
+
* select_option — 选择下拉框选项(value/label)
|
|
877
1374
|
* type — 逐字符模拟键入
|
|
878
1375
|
* focus — 聚焦元素
|
|
879
1376
|
* hover — 鼠标悬停(触发 mouseenter/mouseover)
|
|
@@ -910,7 +1407,7 @@ function queryElement(selector) {
|
|
|
910
1407
|
const el = document.querySelector(selector);
|
|
911
1408
|
if (!el) return `未找到匹配 "${selector}" 的元素`;
|
|
912
1409
|
return el;
|
|
913
|
-
} catch
|
|
1410
|
+
} catch {
|
|
914
1411
|
return `选择器语法错误: ${selector}`;
|
|
915
1412
|
}
|
|
916
1413
|
}
|
|
@@ -988,11 +1485,11 @@ function createDomTool() {
|
|
|
988
1485
|
name: "dom",
|
|
989
1486
|
description: [
|
|
990
1487
|
"Perform DOM operations on the current page.",
|
|
991
|
-
"Actions: click, fill, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
1488
|
+
"Actions: click, fill, select_option, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
992
1489
|
"Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector."
|
|
993
1490
|
].join(" "),
|
|
994
1491
|
schema: Type.Object({
|
|
995
|
-
action: Type.String({ description: "DOM action: click | fill | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
|
|
1492
|
+
action: Type.String({ description: "DOM action: click | fill | select_option | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
|
|
996
1493
|
selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
|
|
997
1494
|
value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions" })),
|
|
998
1495
|
key: Type.Optional(Type.String({ description: "Key name for press action (e.g. Enter, Escape, Tab, ArrowDown, ArrowUp, Backspace, Delete, Space)" })),
|
|
@@ -1046,6 +1543,15 @@ function createDomTool() {
|
|
|
1046
1543
|
try {
|
|
1047
1544
|
switch (action) {
|
|
1048
1545
|
case "click":
|
|
1546
|
+
if (el instanceof HTMLOptionElement) {
|
|
1547
|
+
const parent = el.parentElement;
|
|
1548
|
+
if (parent instanceof HTMLSelectElement) {
|
|
1549
|
+
parent.focus();
|
|
1550
|
+
parent.value = el.value;
|
|
1551
|
+
dispatchInputEvents(parent);
|
|
1552
|
+
return { content: `已选择 ${describeElement(parent)} 的选项 "${el.value}"` };
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1049
1555
|
if (el instanceof HTMLElement) {
|
|
1050
1556
|
el.focus();
|
|
1051
1557
|
el.click();
|
|
@@ -1091,6 +1597,24 @@ function createDomTool() {
|
|
|
1091
1597
|
el.focus();
|
|
1092
1598
|
el.value = value;
|
|
1093
1599
|
dispatchInputEvents(el);
|
|
1600
|
+
} else if (el instanceof HTMLSelectElement) {
|
|
1601
|
+
el.focus();
|
|
1602
|
+
let matched = false;
|
|
1603
|
+
for (const option of Array.from(el.options)) if (option.value === value) {
|
|
1604
|
+
el.value = option.value;
|
|
1605
|
+
matched = true;
|
|
1606
|
+
break;
|
|
1607
|
+
}
|
|
1608
|
+
if (!matched) {
|
|
1609
|
+
const normalized = value.trim().toLowerCase();
|
|
1610
|
+
for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
|
|
1611
|
+
el.value = option.value;
|
|
1612
|
+
matched = true;
|
|
1613
|
+
break;
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
|
|
1617
|
+
dispatchInputEvents(el);
|
|
1094
1618
|
} else if (el instanceof HTMLElement && el.isContentEditable) {
|
|
1095
1619
|
el.focus();
|
|
1096
1620
|
el.textContent = value;
|
|
@@ -1098,6 +1622,29 @@ function createDomTool() {
|
|
|
1098
1622
|
} else return { content: `"${selector}" 不是可编辑元素` };
|
|
1099
1623
|
return { content: `已填写 ${describeElement(el)}: "${value}"` };
|
|
1100
1624
|
}
|
|
1625
|
+
case "select_option": {
|
|
1626
|
+
const value = params.value;
|
|
1627
|
+
if (value === void 0) return { content: "缺少 value 参数" };
|
|
1628
|
+
if (!(el instanceof HTMLSelectElement)) return { content: `"${selector}" 不是下拉框元素` };
|
|
1629
|
+
el.focus();
|
|
1630
|
+
let matched = false;
|
|
1631
|
+
for (const option of Array.from(el.options)) if (option.value === value) {
|
|
1632
|
+
el.value = option.value;
|
|
1633
|
+
matched = true;
|
|
1634
|
+
break;
|
|
1635
|
+
}
|
|
1636
|
+
if (!matched) {
|
|
1637
|
+
const normalized = value.trim().toLowerCase();
|
|
1638
|
+
for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
|
|
1639
|
+
el.value = option.value;
|
|
1640
|
+
matched = true;
|
|
1641
|
+
break;
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
|
|
1645
|
+
dispatchInputEvents(el);
|
|
1646
|
+
return { content: `已选择 ${describeElement(el)}: "${el.value}"` };
|
|
1647
|
+
}
|
|
1101
1648
|
case "type": {
|
|
1102
1649
|
const value = params.value;
|
|
1103
1650
|
if (value === void 0) return { content: "缺少 value 参数" };
|
|
@@ -1166,7 +1713,7 @@ function createDomTool() {
|
|
|
1166
1713
|
}
|
|
1167
1714
|
|
|
1168
1715
|
//#endregion
|
|
1169
|
-
//#region src/web/page-info-tool.ts
|
|
1716
|
+
//#region src/web/tools/page-info-tool.ts
|
|
1170
1717
|
/**
|
|
1171
1718
|
* Page Info Tool — 基于 Web API 的页面信息获取工具。
|
|
1172
1719
|
*
|
|
@@ -1206,6 +1753,11 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1206
1753
|
const maxDepth = opts.maxDepth ?? 6;
|
|
1207
1754
|
const viewportOnly = opts.viewportOnly ?? true;
|
|
1208
1755
|
const pruneLayout = opts.pruneLayout ?? true;
|
|
1756
|
+
const maxNodes = opts.maxNodes ?? 220;
|
|
1757
|
+
const maxChildren = opts.maxChildren ?? 25;
|
|
1758
|
+
const maxTextLength = opts.maxTextLength ?? 40;
|
|
1759
|
+
let emittedNodes = 0;
|
|
1760
|
+
let truncatedByNodeBudget = false;
|
|
1209
1761
|
const refStore = opts.refStore;
|
|
1210
1762
|
const SKIP_TAGS = new Set([
|
|
1211
1763
|
"SCRIPT",
|
|
@@ -1247,14 +1799,18 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1247
1799
|
"title",
|
|
1248
1800
|
"for",
|
|
1249
1801
|
"action",
|
|
1250
|
-
"method"
|
|
1251
|
-
"target",
|
|
1252
|
-
"min",
|
|
1253
|
-
"max",
|
|
1254
|
-
"pattern",
|
|
1255
|
-
"maxlength",
|
|
1256
|
-
"tabindex"
|
|
1802
|
+
"method"
|
|
1257
1803
|
];
|
|
1804
|
+
const INTERACTIVE_TAGS = new Set([
|
|
1805
|
+
"A",
|
|
1806
|
+
"BUTTON",
|
|
1807
|
+
"INPUT",
|
|
1808
|
+
"TEXTAREA",
|
|
1809
|
+
"SELECT",
|
|
1810
|
+
"OPTION",
|
|
1811
|
+
"LABEL",
|
|
1812
|
+
"SUMMARY"
|
|
1813
|
+
]);
|
|
1258
1814
|
/** 布尔状态属性 — 只在存在时输出(无值),如 disabled、checked */
|
|
1259
1815
|
const BOOLEAN_ATTRS = [
|
|
1260
1816
|
"disabled",
|
|
@@ -1262,13 +1818,8 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1262
1818
|
"readonly",
|
|
1263
1819
|
"required",
|
|
1264
1820
|
"selected",
|
|
1265
|
-
"hidden"
|
|
1266
|
-
"multiple",
|
|
1267
|
-
"autofocus",
|
|
1268
|
-
"open"
|
|
1821
|
+
"hidden"
|
|
1269
1822
|
];
|
|
1270
|
-
/** 内联事件属性前缀 */
|
|
1271
|
-
const EVENT_PREFIX = "on";
|
|
1272
1823
|
/**
|
|
1273
1824
|
* 计算元素在父节点中同标签兄弟里的序号(1-based,XPath 规范)。
|
|
1274
1825
|
* 如果同标签兄弟只有一个,返回空字符串(无需索引消歧)。
|
|
@@ -1311,9 +1862,22 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1311
1862
|
if (directText) return false;
|
|
1312
1863
|
return true;
|
|
1313
1864
|
}
|
|
1865
|
+
function isInteractiveElement(el) {
|
|
1866
|
+
if (INTERACTIVE_TAGS.has(el.tagName)) return true;
|
|
1867
|
+
if (el.hasAttribute("onclick")) return true;
|
|
1868
|
+
if (el.hasAttribute("role")) return true;
|
|
1869
|
+
if (el.hasAttribute("tabindex")) return true;
|
|
1870
|
+
if (el.hasAttribute("aria-label")) return true;
|
|
1871
|
+
return false;
|
|
1872
|
+
}
|
|
1314
1873
|
function walk(el, depth, parentPath) {
|
|
1874
|
+
if (emittedNodes >= maxNodes) {
|
|
1875
|
+
truncatedByNodeBudget = true;
|
|
1876
|
+
return "";
|
|
1877
|
+
}
|
|
1315
1878
|
if (depth > maxDepth) return "";
|
|
1316
1879
|
if (SKIP_TAGS.has(el.tagName)) return "";
|
|
1880
|
+
if (el.hasAttribute("data-autopilot-ignore")) return "";
|
|
1317
1881
|
const style = window.getComputedStyle(el);
|
|
1318
1882
|
if (style.display === "none" || style.visibility === "hidden") return "";
|
|
1319
1883
|
if (!isInViewport(el, depth)) return "";
|
|
@@ -1325,22 +1889,19 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1325
1889
|
if (elId) attrs.push(`id="${elId}"`);
|
|
1326
1890
|
const className = el.getAttribute("class")?.trim();
|
|
1327
1891
|
if (className) {
|
|
1328
|
-
const
|
|
1329
|
-
if (
|
|
1892
|
+
const cls = className.split(/\s+/).find((c) => c && !c.startsWith("data-v-") && c.length < 25 && !/^[a-z]{1,2}\d|^_|^css-/.test(c));
|
|
1893
|
+
if (cls) attrs.push(`class="${cls}"`);
|
|
1330
1894
|
}
|
|
1331
1895
|
for (const attr of INTERACTIVE_ATTRS) {
|
|
1332
1896
|
const val = el.getAttribute(attr);
|
|
1333
1897
|
if (val) attrs.push(`${attr}="${val}"`);
|
|
1334
1898
|
}
|
|
1335
1899
|
for (const attr of BOOLEAN_ATTRS) if (el.hasAttribute(attr)) attrs.push(attr);
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
if (
|
|
1339
|
-
const dataAttrs = [];
|
|
1340
|
-
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith("data-") && !attrObj.name.match(/^data-v-/) && dataAttrs.length < 2) dataAttrs.push(`${attrObj.name}="${attrObj.value.slice(0, 30)}"`);
|
|
1341
|
-
if (dataAttrs.length > 0) attrs.push(...dataAttrs);
|
|
1900
|
+
if (el.hasAttribute("onclick")) attrs.push("onclick");
|
|
1901
|
+
const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
|
|
1902
|
+
if (testId) attrs.push(`data-testid="${testId.slice(0, 25)}"`);
|
|
1342
1903
|
if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.value) {
|
|
1343
|
-
const currentVal = el.value.slice(0,
|
|
1904
|
+
const currentVal = el.value.slice(0, 40);
|
|
1344
1905
|
if (el.getAttribute("value") !== currentVal) attrs.push(`val="${currentVal}"`);
|
|
1345
1906
|
}
|
|
1346
1907
|
let directText = "";
|
|
@@ -1353,28 +1914,45 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1353
1914
|
}
|
|
1354
1915
|
directText = directText.trim();
|
|
1355
1916
|
if (isEmptyLayoutContainer(el, directText)) {
|
|
1917
|
+
const allChildren = Array.from(el.children);
|
|
1918
|
+
const interactiveChildren = allChildren.filter(isInteractiveElement);
|
|
1919
|
+
const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
|
|
1920
|
+
const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
|
|
1921
|
+
const selectedChildren = orderedChildren.slice(0, maxChildren);
|
|
1922
|
+
const omittedChildren = orderedChildren.length - selectedChildren.length;
|
|
1356
1923
|
const childLines = [];
|
|
1357
|
-
for (let i = 0; i <
|
|
1358
|
-
const childResult = walk(
|
|
1924
|
+
for (let i = 0; i < selectedChildren.length; i++) {
|
|
1925
|
+
const childResult = walk(selectedChildren[i], depth, currentPath);
|
|
1359
1926
|
if (childResult) childLines.push(childResult);
|
|
1360
1927
|
}
|
|
1928
|
+
if (omittedChildren > 0) childLines.push(`${" ".repeat(depth)}... (${omittedChildren} children omitted)`);
|
|
1361
1929
|
return childLines.join("\n");
|
|
1362
1930
|
}
|
|
1363
1931
|
let line = `${indent}[${tag}]`;
|
|
1364
|
-
if (directText) line += ` "${directText.slice(0,
|
|
1932
|
+
if (directText) line += ` "${directText.slice(0, maxTextLength)}"`;
|
|
1365
1933
|
if (attrs.length) line += ` ${attrs.join(" ")}`;
|
|
1366
1934
|
if (refStore) {
|
|
1367
1935
|
const hashId = refStore.set(el, currentPath);
|
|
1368
1936
|
line += ` #${hashId}`;
|
|
1369
1937
|
} else line += ` ref="${currentPath}"`;
|
|
1370
1938
|
const lines = [line];
|
|
1371
|
-
|
|
1372
|
-
|
|
1939
|
+
emittedNodes++;
|
|
1940
|
+
const allChildren = Array.from(el.children);
|
|
1941
|
+
const interactiveChildren = allChildren.filter(isInteractiveElement);
|
|
1942
|
+
const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
|
|
1943
|
+
const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
|
|
1944
|
+
const selectedChildren = orderedChildren.slice(0, maxChildren);
|
|
1945
|
+
const omittedChildren = orderedChildren.length - selectedChildren.length;
|
|
1946
|
+
for (let i = 0; i < selectedChildren.length; i++) {
|
|
1947
|
+
const childResult = walk(selectedChildren[i], depth + 1, currentPath);
|
|
1373
1948
|
if (childResult) lines.push(childResult);
|
|
1374
1949
|
}
|
|
1950
|
+
if (omittedChildren > 0) lines.push(`${indent} ... (${omittedChildren} children omitted)`);
|
|
1375
1951
|
return lines.join("\n");
|
|
1376
1952
|
}
|
|
1377
|
-
|
|
1953
|
+
const output = walk(root, 0, "") || "(空页面)";
|
|
1954
|
+
if (!truncatedByNodeBudget) return output;
|
|
1955
|
+
return `${output}\n... (snapshot truncated: maxNodes=${maxNodes})`;
|
|
1378
1956
|
}
|
|
1379
1957
|
/**
|
|
1380
1958
|
* 查询所有匹配元素并返回摘要信息(标签、文本、关键属性)。
|
|
@@ -1395,7 +1973,7 @@ function queryAllElements(selector, limit = 20) {
|
|
|
1395
1973
|
}
|
|
1396
1974
|
if (elements.length > limit) results.push(` ...还有 ${elements.length - limit} 个元素`);
|
|
1397
1975
|
return results.join("\n");
|
|
1398
|
-
} catch
|
|
1976
|
+
} catch {
|
|
1399
1977
|
return `选择器语法错误: ${selector}`;
|
|
1400
1978
|
}
|
|
1401
1979
|
}
|
|
@@ -1412,7 +1990,10 @@ function createPageInfoTool() {
|
|
|
1412
1990
|
selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
|
|
1413
1991
|
maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
|
|
1414
1992
|
viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
|
|
1415
|
-
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" }))
|
|
1993
|
+
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" })),
|
|
1994
|
+
maxNodes: Type.Optional(Type.Number({ description: "Maximum nodes to include in snapshot (default: 220)" })),
|
|
1995
|
+
maxChildren: Type.Optional(Type.Number({ description: "Maximum children per element (default: 25)" })),
|
|
1996
|
+
maxTextLength: Type.Optional(Type.Number({ description: "Maximum text length per node (default: 40)" }))
|
|
1416
1997
|
}),
|
|
1417
1998
|
execute: async (params) => {
|
|
1418
1999
|
const action = params.action;
|
|
@@ -1436,10 +2017,16 @@ function createPageInfoTool() {
|
|
|
1436
2017
|
const maxDepth = params.maxDepth ?? 6;
|
|
1437
2018
|
const viewportOnly = params.viewportOnly ?? true;
|
|
1438
2019
|
const pruneLayout = params.pruneLayout ?? true;
|
|
2020
|
+
const maxNodes = params.maxNodes ?? 220;
|
|
2021
|
+
const maxChildren = params.maxChildren ?? 25;
|
|
2022
|
+
const maxTextLength = params.maxTextLength ?? 40;
|
|
1439
2023
|
return { content: generateSnapshot(document.body, {
|
|
1440
2024
|
maxDepth,
|
|
1441
2025
|
viewportOnly,
|
|
1442
2026
|
pruneLayout,
|
|
2027
|
+
maxNodes,
|
|
2028
|
+
maxChildren,
|
|
2029
|
+
maxTextLength,
|
|
1443
2030
|
refStore: getActiveRefStore()
|
|
1444
2031
|
}) };
|
|
1445
2032
|
}
|
|
@@ -1464,7 +2051,7 @@ function createPageInfoTool() {
|
|
|
1464
2051
|
}
|
|
1465
2052
|
|
|
1466
2053
|
//#endregion
|
|
1467
|
-
//#region src/web/navigate-tool.ts
|
|
2054
|
+
//#region src/web/tools/navigate-tool.ts
|
|
1468
2055
|
/**
|
|
1469
2056
|
* Navigate Tool — 基于 Web API 的页面导航工具。
|
|
1470
2057
|
*
|
|
@@ -1544,7 +2131,7 @@ function createNavigateTool() {
|
|
|
1544
2131
|
}
|
|
1545
2132
|
|
|
1546
2133
|
//#endregion
|
|
1547
|
-
//#region src/web/wait-tool.ts
|
|
2134
|
+
//#region src/web/tools/wait-tool.ts
|
|
1548
2135
|
/**
|
|
1549
2136
|
* Wait Tool — 基于 MutationObserver 的元素等待工具。
|
|
1550
2137
|
*
|
|
@@ -1714,7 +2301,7 @@ function createWaitTool() {
|
|
|
1714
2301
|
}
|
|
1715
2302
|
|
|
1716
2303
|
//#endregion
|
|
1717
|
-
//#region src/web/evaluate-tool.ts
|
|
2304
|
+
//#region src/web/tools/evaluate-tool.ts
|
|
1718
2305
|
/**
|
|
1719
2306
|
* Evaluate Tool — 在页面上下文中执行任意 JavaScript 表达式。
|
|
1720
2307
|
*
|
|
@@ -1734,7 +2321,7 @@ function createWaitTool() {
|
|
|
1734
2321
|
function safeEvaluate(expression) {
|
|
1735
2322
|
try {
|
|
1736
2323
|
return { result: new Function(`"use strict"; return (${expression});`)() };
|
|
1737
|
-
} catch
|
|
2324
|
+
} catch {
|
|
1738
2325
|
try {
|
|
1739
2326
|
return { result: new Function(`"use strict"; ${expression}`)() };
|
|
1740
2327
|
} catch (err2) {
|
|
@@ -2000,6 +2587,7 @@ var WebAgent = class {
|
|
|
2000
2587
|
provider;
|
|
2001
2588
|
model;
|
|
2002
2589
|
baseURL;
|
|
2590
|
+
stream;
|
|
2003
2591
|
dryRun;
|
|
2004
2592
|
maxRounds;
|
|
2005
2593
|
customSystemPrompt;
|
|
@@ -2021,8 +2609,9 @@ var WebAgent = class {
|
|
|
2021
2609
|
this.provider = options.provider ?? "copilot";
|
|
2022
2610
|
this.model = options.model ?? "gpt-4o";
|
|
2023
2611
|
this.baseURL = options.baseURL;
|
|
2612
|
+
this.stream = options.stream ?? true;
|
|
2024
2613
|
this.dryRun = options.dryRun ?? false;
|
|
2025
|
-
this.maxRounds = options.maxRounds ??
|
|
2614
|
+
this.maxRounds = options.maxRounds ?? 40;
|
|
2026
2615
|
this.customSystemPrompt = options.systemPrompt;
|
|
2027
2616
|
this.memory = options.memory ?? false;
|
|
2028
2617
|
this.autoSnapshot = options.autoSnapshot ?? true;
|
|
@@ -2065,6 +2654,14 @@ var WebAgent = class {
|
|
|
2065
2654
|
setModel(model) {
|
|
2066
2655
|
this.model = model;
|
|
2067
2656
|
}
|
|
2657
|
+
/** 设置是否启用流式输出(SSE) */
|
|
2658
|
+
setStream(enabled) {
|
|
2659
|
+
this.stream = enabled;
|
|
2660
|
+
}
|
|
2661
|
+
/** 获取当前流式输出开关状态 */
|
|
2662
|
+
getStream() {
|
|
2663
|
+
return this.stream;
|
|
2664
|
+
}
|
|
2068
2665
|
/** 切换干运行模式 */
|
|
2069
2666
|
setDryRun(enabled) {
|
|
2070
2667
|
this.dryRun = enabled;
|
|
@@ -2116,14 +2713,19 @@ var WebAgent = class {
|
|
|
2116
2713
|
let systemPrompt = this.customSystemPrompt ?? buildSystemPrompt({ tools: this.registry.getDefinitions() });
|
|
2117
2714
|
const refStore = new RefStore(globalThis.location?.href);
|
|
2118
2715
|
setActiveRefStore(refStore);
|
|
2119
|
-
|
|
2716
|
+
let initialSnapshot;
|
|
2717
|
+
try {
|
|
2120
2718
|
const snapshot = generateSnapshot(document.body, {
|
|
2121
2719
|
maxDepth: 8,
|
|
2720
|
+
viewportOnly: false,
|
|
2721
|
+
maxNodes: 500,
|
|
2722
|
+
maxChildren: 30,
|
|
2122
2723
|
...this.snapshotOptions,
|
|
2123
2724
|
refStore
|
|
2124
2725
|
});
|
|
2125
|
-
|
|
2126
|
-
|
|
2726
|
+
initialSnapshot = snapshot;
|
|
2727
|
+
if (this.autoSnapshot) this.callbacks.onSnapshot?.(snapshot);
|
|
2728
|
+
systemPrompt += wrapSnapshot(`\n\n## DOM Snapshot\n\`\`\`\n${snapshot}\n\`\`\``);
|
|
2127
2729
|
} catch {}
|
|
2128
2730
|
const wrappedCallbacks = {
|
|
2129
2731
|
...this.callbacks,
|
|
@@ -2138,6 +2740,7 @@ var WebAgent = class {
|
|
|
2138
2740
|
registry: this.registry,
|
|
2139
2741
|
systemPrompt,
|
|
2140
2742
|
message,
|
|
2743
|
+
initialSnapshot,
|
|
2141
2744
|
history: this.memory ? this.history : void 0,
|
|
2142
2745
|
dryRun: this.dryRun,
|
|
2143
2746
|
maxRounds: this.maxRounds,
|
|
@@ -2159,7 +2762,8 @@ var WebAgent = class {
|
|
|
2159
2762
|
provider: this.provider,
|
|
2160
2763
|
model: this.model,
|
|
2161
2764
|
apiKey: this.token,
|
|
2162
|
-
baseURL: this.baseURL
|
|
2765
|
+
baseURL: this.baseURL,
|
|
2766
|
+
stream: this.stream
|
|
2163
2767
|
});
|
|
2164
2768
|
}
|
|
2165
2769
|
};
|