agentpage 0.0.12 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -4
- package/dist/index.d.mts +36 -10
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +946 -402
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -6,9 +6,11 @@ import { Type } from "@sinclair/typebox";
|
|
|
6
6
|
*
|
|
7
7
|
* 统一集中在该文件,避免在主循环中散落“魔法数字”。
|
|
8
8
|
*/
|
|
9
|
-
const DEFAULT_MAX_ROUNDS =
|
|
9
|
+
const DEFAULT_MAX_ROUNDS = 40;
|
|
10
10
|
const DEFAULT_RECOVERY_WAIT_MS = 100;
|
|
11
|
-
const DEFAULT_ACTION_RECOVERY_ROUNDS =
|
|
11
|
+
const DEFAULT_ACTION_RECOVERY_ROUNDS = 2;
|
|
12
|
+
const DEFAULT_NOT_FOUND_RETRY_ROUNDS = 2;
|
|
13
|
+
const DEFAULT_NOT_FOUND_RETRY_WAIT_MS = 2e3;
|
|
12
14
|
/** 快照起始标记 — 用于在消息中识别快照边界 */
|
|
13
15
|
const SNAPSHOT_START = "<!-- SNAPSHOT_START -->";
|
|
14
16
|
/** 快照结束标记 */
|
|
@@ -18,15 +20,15 @@ const SNAPSHOT_OUTDATED = "[此快照已过期,请参考对话中最新的快
|
|
|
18
20
|
|
|
19
21
|
//#endregion
|
|
20
22
|
//#region src/core/agent-loop/helpers.ts
|
|
21
|
-
/**
|
|
23
|
+
/** 异步睡眠(中)/ Async sleep utility (EN). */
|
|
22
24
|
function sleep$1(ms) {
|
|
23
25
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
24
26
|
}
|
|
25
|
-
/**
|
|
27
|
+
/** 统一内容为字符串(中)/ Normalize tool content to string (EN). */
|
|
26
28
|
function toContentString(content) {
|
|
27
29
|
return typeof content === "string" ? content : JSON.stringify(content, null, 2);
|
|
28
30
|
}
|
|
29
|
-
/**
|
|
31
|
+
/** 元素不存在判定(中)/ Detect element-not-found failure (EN). */
|
|
30
32
|
function isElementNotFoundResult(result) {
|
|
31
33
|
const details = result.details;
|
|
32
34
|
if (details && typeof details === "object") {
|
|
@@ -35,15 +37,14 @@ function isElementNotFoundResult(result) {
|
|
|
35
37
|
const content = toContentString(result.content);
|
|
36
38
|
return content.includes("未找到") && content.includes("元素");
|
|
37
39
|
}
|
|
38
|
-
/**
|
|
40
|
+
/** 生成稳定调用键(中)/ Build stable key for a tool call (EN). */
|
|
39
41
|
function buildToolCallKey(name, input) {
|
|
40
42
|
return `${name}:${JSON.stringify(input)}`;
|
|
41
43
|
}
|
|
42
44
|
/**
|
|
43
|
-
*
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
* - 最后回退默认值
|
|
45
|
+
* 解析恢复等待时长(中)/ Resolve recovery wait duration (EN).
|
|
46
|
+
* 优先级:waitMs > waitSeconds > 默认值。
|
|
47
|
+
* Priority: waitMs > waitSeconds > default value.
|
|
47
48
|
*/
|
|
48
49
|
function resolveRecoveryWaitMs(input) {
|
|
49
50
|
if (!input || typeof input !== "object") return DEFAULT_RECOVERY_WAIT_MS;
|
|
@@ -54,74 +55,93 @@ function resolveRecoveryWaitMs(input) {
|
|
|
54
55
|
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
55
56
|
return DEFAULT_RECOVERY_WAIT_MS;
|
|
56
57
|
}
|
|
57
|
-
/**
|
|
58
|
-
function formatToolInputBrief(input) {
|
|
59
|
-
if (!input || typeof input !== "object") return "";
|
|
60
|
-
const params = input;
|
|
61
|
-
const parts = [];
|
|
62
|
-
for (const key of [
|
|
63
|
-
"action",
|
|
64
|
-
"selector",
|
|
65
|
-
"waitMs",
|
|
66
|
-
"waitSeconds",
|
|
67
|
-
"url",
|
|
68
|
-
"text"
|
|
69
|
-
]) {
|
|
70
|
-
const value = params[key];
|
|
71
|
-
if (value === void 0 || value === null) continue;
|
|
72
|
-
if (typeof value === "string") parts.push(`${key}=${JSON.stringify(value).slice(0, 80)}`);
|
|
73
|
-
else if (typeof value === "number" || typeof value === "boolean") parts.push(`${key}=${String(value)}`);
|
|
74
|
-
}
|
|
75
|
-
if (parts.length === 0) return "";
|
|
76
|
-
return ` (${parts.join(", ")})`;
|
|
77
|
-
}
|
|
78
|
-
/** 从工具参数中读取 action。 */
|
|
58
|
+
/** 读取工具 action(中)/ Read tool action from input (EN). */
|
|
79
59
|
function getToolAction(input) {
|
|
80
60
|
if (!input || typeof input !== "object") return void 0;
|
|
81
61
|
const action = input.action;
|
|
82
62
|
return typeof action === "string" ? action : void 0;
|
|
83
63
|
}
|
|
84
|
-
/**
|
|
64
|
+
/** 判定错误标记(中)/ Check whether result is marked as error (EN). */
|
|
85
65
|
function hasToolError(result) {
|
|
86
66
|
return result.details && typeof result.details === "object" ? Boolean(result.details.error) : false;
|
|
87
67
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
68
|
+
|
|
69
|
+
//#endregion
|
|
70
|
+
//#region src/core/agent-loop/snapshot.ts
|
|
71
|
+
/**
|
|
72
|
+
* 读取页面快照(中)/ Read current page snapshot (EN).
|
|
73
|
+
*
|
|
74
|
+
* 默认关闭 viewportOnly,优先完整性。
|
|
75
|
+
* viewportOnly defaults to false to prioritize completeness.
|
|
76
|
+
*/
|
|
77
|
+
async function readPageSnapshot(registry, options) {
|
|
95
78
|
return toContentString((await registry.dispatch("page_info", {
|
|
96
79
|
action: "snapshot",
|
|
97
|
-
maxDepth
|
|
80
|
+
maxDepth: options?.maxDepth ?? 8,
|
|
81
|
+
viewportOnly: options?.viewportOnly ?? false,
|
|
82
|
+
pruneLayout: options?.pruneLayout ?? true,
|
|
83
|
+
maxNodes: options?.maxNodes ?? 500,
|
|
84
|
+
maxChildren: options?.maxChildren ?? 30,
|
|
85
|
+
maxTextLength: options?.maxTextLength ?? 40
|
|
98
86
|
})).content);
|
|
99
87
|
}
|
|
100
|
-
/**
|
|
88
|
+
/** 包裹快照(中)/ Wrap snapshot with boundary markers (EN). */
|
|
89
|
+
function wrapSnapshot(snapshot) {
|
|
90
|
+
return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
|
|
91
|
+
}
|
|
92
|
+
/** 转义正则字符(中)/ Escape regex special chars (EN). */
|
|
101
93
|
function escapeRegex(str) {
|
|
102
94
|
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
103
95
|
}
|
|
104
|
-
/**
|
|
96
|
+
/** 快照块匹配正则(中)/ Regex for snapshot blocks (EN). */
|
|
105
97
|
const SNAPSHOT_REGEX = new RegExp(`${escapeRegex(SNAPSHOT_START)}[\\s\\S]*?${escapeRegex(SNAPSHOT_END)}`, "g");
|
|
106
|
-
/**
|
|
107
|
-
function wrapSnapshot(snapshot) {
|
|
108
|
-
return `${SNAPSHOT_START}\n${snapshot}\n${SNAPSHOT_END}`;
|
|
109
|
-
}
|
|
110
|
-
/** 检测文本中是否包含快照标记。 */
|
|
98
|
+
/** 是否包含快照标记(中)/ Check whether text includes snapshot markers (EN). */
|
|
111
99
|
function containsSnapshot(text) {
|
|
112
100
|
return text.includes(SNAPSHOT_START);
|
|
113
101
|
}
|
|
114
102
|
/**
|
|
115
|
-
*
|
|
116
|
-
* 当消息历史中已有更新的快照时调用,避免 AI 参考过时信息。
|
|
103
|
+
* 剥离旧快照(中)/ Strip outdated snapshot blocks from system prompt (EN).
|
|
117
104
|
*/
|
|
118
105
|
function stripSnapshotFromPrompt(prompt) {
|
|
119
106
|
if (!containsSnapshot(prompt)) return prompt;
|
|
120
107
|
return prompt.replace(SNAPSHOT_REGEX, SNAPSHOT_OUTDATED);
|
|
121
108
|
}
|
|
109
|
+
|
|
110
|
+
//#endregion
|
|
111
|
+
//#region src/core/agent-loop/messages.ts
|
|
122
112
|
/**
|
|
123
|
-
*
|
|
124
|
-
|
|
113
|
+
* 显式 UI 意图判定(中)/ Detect explicit intent to operate AutoPilot UI (EN).
|
|
114
|
+
*/
|
|
115
|
+
function isExplicitAgentUiRequest(userMessage) {
|
|
116
|
+
const lower = userMessage.toLowerCase();
|
|
117
|
+
const compact = lower.replace(/[\s\p{P}\p{S}]+/gu, "");
|
|
118
|
+
const hasAgentUiKeyword = /(chat|dock|chatinput|sendbutton|shortcut|quicktest)/i.test(lower) || /(聊天|对话|指令输入框|消息输入框|输入框|发送按钮|发送|快捷测试|测试按钮|聊天面板)/.test(compact);
|
|
119
|
+
const hasActionVerb = /(press|click|type|fill|send|input|submit|enter)/i.test(lower) || /(输入|点击|发送|填写|填入|操作|提交|回车|按下)/.test(compact);
|
|
120
|
+
return hasAgentUiKeyword && hasActionVerb;
|
|
121
|
+
}
|
|
122
|
+
/** 输入摘要(中)/ Build brief text for tool input (EN). */
|
|
123
|
+
function formatToolInputBrief(input) {
|
|
124
|
+
if (!input || typeof input !== "object") return "";
|
|
125
|
+
const params = input;
|
|
126
|
+
const parts = [];
|
|
127
|
+
for (const key of [
|
|
128
|
+
"action",
|
|
129
|
+
"selector",
|
|
130
|
+
"waitMs",
|
|
131
|
+
"waitSeconds",
|
|
132
|
+
"url",
|
|
133
|
+
"text"
|
|
134
|
+
]) {
|
|
135
|
+
const value = params[key];
|
|
136
|
+
if (value === void 0 || value === null) continue;
|
|
137
|
+
if (typeof value === "string") parts.push(`${key}=${JSON.stringify(value).slice(0, 80)}`);
|
|
138
|
+
else if (typeof value === "number" || typeof value === "boolean") parts.push(`${key}=${String(value)}`);
|
|
139
|
+
}
|
|
140
|
+
if (parts.length === 0) return "";
|
|
141
|
+
return ` (${parts.join(", ")})`;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* 结果摘要(中)/ Build one-line summary for tool result (EN).
|
|
125
145
|
*/
|
|
126
146
|
function formatToolResultBrief(result) {
|
|
127
147
|
const firstLine = toContentString(result.content).split("\n").find((l) => l.trim())?.trim().slice(0, 80) ?? "";
|
|
@@ -132,25 +152,36 @@ function formatToolResultBrief(result) {
|
|
|
132
152
|
return `✓ ${firstLine}`;
|
|
133
153
|
}
|
|
134
154
|
/**
|
|
135
|
-
*
|
|
136
|
-
*
|
|
137
|
-
* 核心思路:保留用户原始消息与 system prompt 不变,
|
|
138
|
-
* 只将循环中产出的 assistant(含 toolCalls)+ tool(结果)消息对
|
|
139
|
-
* 压缩为一条 assistant 摘要 + 一条 user 上下文。
|
|
155
|
+
* 构建紧凑消息数组(中)/ Build compact AI message array (EN).
|
|
140
156
|
*
|
|
141
|
-
*
|
|
142
|
-
*
|
|
143
|
-
* - 后续:[...history, { user: 原始消息 }, { assistant: 工具执行摘要 }, { user: 当前状态+快照 }]
|
|
157
|
+
* Round 0: task + snapshot.
|
|
158
|
+
* Round 1+: master goal + done steps + execution context + latest snapshot.
|
|
144
159
|
*
|
|
145
|
-
*
|
|
160
|
+
* 新增渐进式语义(中)/ Progressive semantics (EN):
|
|
161
|
+
* - `remainingInstruction`:当前轮次仍待执行的文本。
|
|
162
|
+
* - `previousRoundTasks`:上一轮已执行的任务数组,避免重复计划。
|
|
163
|
+
* - 消息中要求模型输出 `REMAINING: ...` 或 `REMAINING: DONE`,供下一轮继续消费。
|
|
146
164
|
*/
|
|
147
|
-
function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history) {
|
|
165
|
+
function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks) {
|
|
148
166
|
const messages = history ? [...history] : [];
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
167
|
+
const allowAgentUiInteraction = isExplicitAgentUiRequest(userMessage);
|
|
168
|
+
const activeInstruction = remainingInstruction && remainingInstruction.trim() ? remainingInstruction.trim() : userMessage;
|
|
169
|
+
if (trace.length === 0) {
|
|
170
|
+
const parts = [
|
|
171
|
+
userMessage,
|
|
172
|
+
"",
|
|
173
|
+
"## Progressive execution state",
|
|
174
|
+
"Current remaining instruction to execute this round:",
|
|
175
|
+
activeInstruction
|
|
176
|
+
];
|
|
177
|
+
if (currentUrl) parts.push("", `URL: ${currentUrl}`);
|
|
178
|
+
if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
|
|
179
|
+
messages.push({
|
|
180
|
+
role: "user",
|
|
181
|
+
content: parts.join("\n")
|
|
182
|
+
});
|
|
183
|
+
return messages;
|
|
184
|
+
}
|
|
154
185
|
const traceParts = [];
|
|
155
186
|
for (let i = 0; i < trace.length; i++) {
|
|
156
187
|
const entry = trace[i];
|
|
@@ -158,23 +189,40 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
158
189
|
const brief = formatToolResultBrief(entry.result);
|
|
159
190
|
const status = isError ? "❌" : "✅";
|
|
160
191
|
const marker = entry.marker ? ` ${entry.marker}` : "";
|
|
161
|
-
traceParts.push(`${status}
|
|
192
|
+
traceParts.push(`${status} ${i + 1}. ${entry.name}${formatToolInputBrief(entry.input)} → ${brief}${marker}`);
|
|
162
193
|
}
|
|
163
194
|
messages.push({
|
|
164
195
|
role: "assistant",
|
|
165
|
-
content:
|
|
196
|
+
content: `Done steps (do NOT repeat):\n${traceParts.join("\n")}`
|
|
166
197
|
});
|
|
167
|
-
const
|
|
198
|
+
const hasErrors = trace.some((e) => hasToolError(e.result));
|
|
199
|
+
const contextParts = [
|
|
200
|
+
"## Execution context",
|
|
201
|
+
"Current remaining instruction:",
|
|
202
|
+
activeInstruction,
|
|
203
|
+
"",
|
|
204
|
+
"Task-reduction model:",
|
|
205
|
+
"Input: current remaining instruction + previous round executed actions + this-round actions.",
|
|
206
|
+
"Output: new remaining instruction after removing this-round actions.",
|
|
207
|
+
"Start from visible page state directly. Do NOT restate task. Do NOT output planning text.",
|
|
208
|
+
"Execute all independent visible sub-tasks in one round.",
|
|
209
|
+
"Do NOT act on elements not present in this snapshot yet.",
|
|
210
|
+
"If action changes DOM (open modal/navigate), stop after that batch and continue next round.",
|
|
211
|
+
"Do NOT call page_info (get_url/get_title/query_all/snapshot).",
|
|
212
|
+
"For dropdown/select fields, use dom with action=select_option (or fill on a select).",
|
|
213
|
+
allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content."
|
|
214
|
+
];
|
|
215
|
+
if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
|
|
216
|
+
else contextParts.push("", "If the goal is fully done, reply with a short summary (no tool calls).");
|
|
217
|
+
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous round planned task array (already executed):", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`));
|
|
218
|
+
contextParts.push("", "After this round, include one plain text line:", "REMAINING: <new remaining instruction after this-round actions>", "or REMAINING: DONE");
|
|
168
219
|
const lastEntry = trace[trace.length - 1];
|
|
169
220
|
if (hasToolError(lastEntry.result)) {
|
|
170
221
|
const stripped = toContentString(lastEntry.result.content).replace(SNAPSHOT_REGEX, "").trim();
|
|
171
|
-
if (stripped && stripped.length <
|
|
172
|
-
contextParts.push("", "### 最近失败操作详情", stripped);
|
|
173
|
-
contextParts.push("请换一种方式完成该步骤,或跳过该步骤继续后续操作。");
|
|
174
|
-
}
|
|
222
|
+
if (stripped && stripped.length < 300) contextParts.push("", "Last error: " + stripped);
|
|
175
223
|
}
|
|
176
|
-
if (currentUrl) contextParts.push("",
|
|
177
|
-
if (latestSnapshot) contextParts.push("", "##
|
|
224
|
+
if (currentUrl) contextParts.push("", `URL: ${currentUrl}`);
|
|
225
|
+
if (latestSnapshot) contextParts.push("", "## Latest DOM snapshot", "Use hash IDs from this snapshot. Do NOT call page_info — this is already the latest.", wrapSnapshot(latestSnapshot));
|
|
178
226
|
messages.push({
|
|
179
227
|
role: "user",
|
|
180
228
|
content: contextParts.join("\n")
|
|
@@ -182,39 +230,339 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
182
230
|
return messages;
|
|
183
231
|
}
|
|
184
232
|
|
|
233
|
+
//#endregion
|
|
234
|
+
//#region src/core/agent-loop/recovery.ts
|
|
235
|
+
/** 冗余 page_info 动作(中)/ Redundant page_info actions to intercept (EN). */
|
|
236
|
+
const REDUNDANT_PAGE_INFO_ACTIONS = new Set([
|
|
237
|
+
"snapshot",
|
|
238
|
+
"query_all",
|
|
239
|
+
"get_url",
|
|
240
|
+
"get_title",
|
|
241
|
+
"get_viewport"
|
|
242
|
+
]);
|
|
243
|
+
/**
|
|
244
|
+
* 冗余 page_info 检查(中)/ Check whether page_info call is redundant (EN).
|
|
245
|
+
*/
|
|
246
|
+
function checkRedundantSnapshot(toolName, toolInput, _latestSnapshot, round) {
|
|
247
|
+
if (toolName !== "page_info") return null;
|
|
248
|
+
const action = getToolAction(toolInput);
|
|
249
|
+
if (action && REDUNDANT_PAGE_INFO_ACTIONS.has(action)) return {
|
|
250
|
+
content: `page_info.${action} is blocked in loop execution. A snapshot is provided by the framework; continue with actionable tools directly.`,
|
|
251
|
+
details: {
|
|
252
|
+
code: "REDUNDANT_PAGE_INFO_SKIPPED",
|
|
253
|
+
action,
|
|
254
|
+
round
|
|
255
|
+
}
|
|
256
|
+
};
|
|
257
|
+
return null;
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* 快照防抖(中)/ Debounce repeated snapshot calls (EN).
|
|
261
|
+
*/
|
|
262
|
+
function applySnapshotDebounce(toolName, toolInput, result, consecutiveCount) {
|
|
263
|
+
if (toolName === "page_info" && getToolAction(toolInput) === "snapshot") {
|
|
264
|
+
const newCount = consecutiveCount + 1;
|
|
265
|
+
if (newCount >= 2) return {
|
|
266
|
+
consecutiveCount: newCount,
|
|
267
|
+
result: {
|
|
268
|
+
content: [toContentString(result.content), "Redundant snapshot detected. Continue with remaining actionable steps using the latest snapshot; avoid additional snapshot unless navigation or uncertainty changes."].join("\n"),
|
|
269
|
+
details: {
|
|
270
|
+
error: true,
|
|
271
|
+
code: "REDUNDANT_SNAPSHOT",
|
|
272
|
+
consecutiveSnapshotCalls: newCount
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
};
|
|
276
|
+
return {
|
|
277
|
+
result,
|
|
278
|
+
consecutiveCount: newCount
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
return {
|
|
282
|
+
result,
|
|
283
|
+
consecutiveCount: 0
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* 元素未找到恢复(中)/ Recover from element-not-found failures (EN).
|
|
288
|
+
*
|
|
289
|
+
* 前两次自动恢复,超过上限后返回终止提示。
|
|
290
|
+
* Auto-recovers for initial attempts, then returns max-recovery signal.
|
|
291
|
+
*/
|
|
292
|
+
async function handleElementRecovery(toolName, toolInput, result, recoveryAttempts, registry, pageContext, callbacks) {
|
|
293
|
+
if (toolName !== "dom" || !isElementNotFoundResult(result)) return null;
|
|
294
|
+
const key = buildToolCallKey(toolName, toolInput);
|
|
295
|
+
const attempts = (recoveryAttempts.get(key) ?? 0) + 1;
|
|
296
|
+
recoveryAttempts.set(key, attempts);
|
|
297
|
+
const recoveryWaitMs = resolveRecoveryWaitMs(toolInput);
|
|
298
|
+
if (attempts <= DEFAULT_ACTION_RECOVERY_ROUNDS) {
|
|
299
|
+
await sleep$1(recoveryWaitMs);
|
|
300
|
+
callbacks?.onBeforeRecoverySnapshot?.();
|
|
301
|
+
pageContext.latestSnapshot = await readPageSnapshot(registry);
|
|
302
|
+
return {
|
|
303
|
+
content: [toContentString(result.content), `Recovery ${attempts}/${DEFAULT_ACTION_RECOVERY_ROUNDS}: snapshot refreshed, re-locate target.`].join("\n"),
|
|
304
|
+
details: {
|
|
305
|
+
error: true,
|
|
306
|
+
code: "ELEMENT_NOT_FOUND_RECOVERY",
|
|
307
|
+
recoveryAttempt: attempts,
|
|
308
|
+
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
309
|
+
}
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
return {
|
|
313
|
+
content: [toContentString(result.content), `Max recovery attempts (${DEFAULT_ACTION_RECOVERY_ROUNDS}) reached. Try a different target.`].join("\n"),
|
|
314
|
+
details: {
|
|
315
|
+
error: true,
|
|
316
|
+
code: "ELEMENT_NOT_FOUND_MAX_RECOVERY_REACHED",
|
|
317
|
+
recoveryAttempt: attempts,
|
|
318
|
+
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
319
|
+
}
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
/** 导航后快照刷新(中)/ Refresh snapshot after navigation actions (EN). */
|
|
323
|
+
async function handleNavigationUrlChange(toolName, toolInput, result, registry, pageContext, callbacks) {
|
|
324
|
+
if (toolName !== "navigate") return;
|
|
325
|
+
const action = getToolAction(toolInput);
|
|
326
|
+
if ((action === "goto" || action === "back" || action === "forward" || action === "reload") && !hasToolError(result)) {
|
|
327
|
+
callbacks?.onBeforeRecoverySnapshot?.();
|
|
328
|
+
pageContext.latestSnapshot = await readPageSnapshot(registry);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
/** 只读工具集合(中)/ Read-only tool set (EN). */
|
|
332
|
+
const READ_ONLY_TOOLS = new Set(["page_info"]);
|
|
333
|
+
/**
|
|
334
|
+
* 空转检测(中)/ Detect idle loops dominated by read-only actions (EN).
|
|
335
|
+
* 返回 -1 表示应终止循环。
|
|
336
|
+
* Returns -1 when loop should terminate.
|
|
337
|
+
*/
|
|
338
|
+
function detectIdleLoop(toolCallNames, consecutiveReadOnlyRounds) {
|
|
339
|
+
if (toolCallNames.every((name) => READ_ONLY_TOOLS.has(name))) {
|
|
340
|
+
const newCount = consecutiveReadOnlyRounds + 1;
|
|
341
|
+
return newCount >= 2 ? -1 : newCount;
|
|
342
|
+
}
|
|
343
|
+
return 0;
|
|
344
|
+
}
|
|
345
|
+
|
|
185
346
|
//#endregion
|
|
186
347
|
//#region src/core/agent-loop/index.ts
|
|
187
348
|
/**
|
|
188
|
-
*
|
|
349
|
+
* Agent Loop 主流程(中)/ Core environment-agnostic agent loop (EN).
|
|
350
|
+
*
|
|
351
|
+
* 负责消息构建、AI 决策、工具执行、恢复保护与指标汇总。
|
|
352
|
+
* Orchestrates message build, AI decisions, tool execution, recovery, and metrics.
|
|
353
|
+
*
|
|
354
|
+
* 流程图(文本):
|
|
355
|
+
*
|
|
356
|
+
* 轮次开始
|
|
357
|
+
* │
|
|
358
|
+
* ├─ 确保快照可用
|
|
359
|
+
* ├─ 构建紧凑消息(目标 + 剩余任务 + 执行轨迹 + 快照)
|
|
360
|
+
* ├─ 调用模型
|
|
361
|
+
* ├─ 无 toolCalls ? 结束 : 执行工具
|
|
362
|
+
* ├─ 应用保护机制(冗余拦截/恢复/导航检测/空转/防自转)
|
|
363
|
+
* ├─ 刷新快照
|
|
364
|
+
* ▼
|
|
365
|
+
* 下一轮或停机
|
|
366
|
+
*/
|
|
367
|
+
/**
|
|
368
|
+
* 执行 Agent 循环(中)/ Execute the agent loop (EN).
|
|
189
369
|
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
* 2. 循环:发消息给 AI → 检查是否返回 tool_call → 执行 → 反馈 → 继续
|
|
193
|
-
* 3. AI 不再调用工具时,返回最终回复
|
|
370
|
+
* 每轮:确保快照 → 构建消息 → 调用 AI → 执行工具 → 保护处理 → 刷新快照。
|
|
371
|
+
* Per round: ensure snapshot -> build messages -> call AI -> execute tools -> apply protections -> refresh snapshot.
|
|
194
372
|
*/
|
|
195
373
|
async function executeAgentLoop(params) {
|
|
196
|
-
const { client, registry, systemPrompt, message, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
|
|
374
|
+
const { client, registry, systemPrompt, message, initialSnapshot, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
|
|
197
375
|
const tools = registry.getDefinitions();
|
|
198
376
|
const allToolCalls = [];
|
|
199
377
|
const fullToolTrace = [];
|
|
200
378
|
const actionRecoveryAttempts = /* @__PURE__ */ new Map();
|
|
201
|
-
const pageContext = {};
|
|
379
|
+
const pageContext = { latestSnapshot: initialSnapshot };
|
|
202
380
|
let finalReply = "";
|
|
381
|
+
let consecutiveSnapshotCalls = 0;
|
|
382
|
+
let consecutiveReadOnlyRounds = 0;
|
|
383
|
+
let usedRounds = 0;
|
|
384
|
+
let inputTokens = 0;
|
|
385
|
+
let outputTokens = 0;
|
|
386
|
+
let remainingInstruction = message.trim();
|
|
387
|
+
let previousRoundTasks = [];
|
|
388
|
+
let lastPlannedBatchKey = "";
|
|
389
|
+
let consecutiveSamePlannedBatch = 0;
|
|
390
|
+
let lastRoundHadError = false;
|
|
391
|
+
let recoveryCount = 0;
|
|
392
|
+
let redundantInterceptCount = 0;
|
|
393
|
+
let pendingNotFoundRetry;
|
|
394
|
+
let snapshotReadCount = 0;
|
|
395
|
+
let snapshotSizeTotal = 0;
|
|
396
|
+
let snapshotSizeMax = 0;
|
|
397
|
+
/**
|
|
398
|
+
* 记录快照统计(中)/ Record snapshot metrics (EN).
|
|
399
|
+
*
|
|
400
|
+
* 用于输出可观测指标:读取次数、平均长度、最大长度。
|
|
401
|
+
* Used for observability metrics: read count, avg size, max size.
|
|
402
|
+
*/
|
|
403
|
+
const recordSnapshotStats = (snapshot) => {
|
|
404
|
+
if (typeof snapshot !== "string") return;
|
|
405
|
+
snapshotReadCount += 1;
|
|
406
|
+
snapshotSizeTotal += snapshot.length;
|
|
407
|
+
if (snapshot.length > snapshotSizeMax) snapshotSizeMax = snapshot.length;
|
|
408
|
+
};
|
|
409
|
+
/**
|
|
410
|
+
* 刷新页面快照(中)/ Refresh page snapshot (EN).
|
|
411
|
+
*
|
|
412
|
+
* 只做两件事:读取最新快照 + 更新快照统计。
|
|
413
|
+
* Does exactly two things: read latest snapshot + update metrics.
|
|
414
|
+
*/
|
|
415
|
+
const refreshSnapshot = async () => {
|
|
416
|
+
pageContext.latestSnapshot = await readPageSnapshot(registry);
|
|
417
|
+
recordSnapshotStats(pageContext.latestSnapshot);
|
|
418
|
+
};
|
|
419
|
+
if (pageContext.latestSnapshot) recordSnapshotStats(pageContext.latestSnapshot);
|
|
420
|
+
/**
|
|
421
|
+
* 追加工具轨迹(中)/ Append tool trace entry (EN).
|
|
422
|
+
*
|
|
423
|
+
* 同时写入:
|
|
424
|
+
* - allToolCalls:对外返回结果
|
|
425
|
+
* - fullToolTrace:下一轮消息上下文
|
|
426
|
+
*/
|
|
427
|
+
const appendToolTrace = (round, name, input, result) => {
|
|
428
|
+
allToolCalls.push({
|
|
429
|
+
name,
|
|
430
|
+
input,
|
|
431
|
+
result
|
|
432
|
+
});
|
|
433
|
+
fullToolTrace.push({
|
|
434
|
+
round,
|
|
435
|
+
name,
|
|
436
|
+
input,
|
|
437
|
+
result
|
|
438
|
+
});
|
|
439
|
+
};
|
|
440
|
+
/**
|
|
441
|
+
* 生成任务数组(中)/ Build normalized task array (EN).
|
|
442
|
+
*
|
|
443
|
+
* 将本轮 toolCalls 归一化成稳定字符串数组,便于:
|
|
444
|
+
* - 回传到下一轮消息上下文(提醒已执行计划)
|
|
445
|
+
* - 进行“是否与上一轮完全相同”的比较
|
|
446
|
+
*/
|
|
447
|
+
const buildTaskArray = (toolCalls) => toolCalls.map((tc) => {
|
|
448
|
+
const inputText = JSON.stringify(tc.input);
|
|
449
|
+
return `${tc.name}:${inputText}`;
|
|
450
|
+
});
|
|
451
|
+
/**
|
|
452
|
+
* 判定动作是否会触发 DOM 结构变化(中)/ Whether action may cause DOM-shape change (EN).
|
|
453
|
+
*
|
|
454
|
+
* 触发后应强制断轮,等待下一轮新快照继续。
|
|
455
|
+
* Force round break after such action and continue with refreshed snapshot next round.
|
|
456
|
+
*/
|
|
457
|
+
const shouldForceRoundBreak = (toolName, toolInput) => {
|
|
458
|
+
const action = getToolAction(toolInput);
|
|
459
|
+
if (toolName === "navigate") return action === "goto" || action === "back" || action === "forward" || action === "reload";
|
|
460
|
+
if (toolName === "dom") return action === "click" || action === "press";
|
|
461
|
+
if (toolName === "evaluate") return true;
|
|
462
|
+
return false;
|
|
463
|
+
};
|
|
464
|
+
/**
|
|
465
|
+
* 将“找不到元素”的失败任务整理成可重试清单(中)/ Build retry task list for not-found failures (EN).
|
|
466
|
+
*/
|
|
467
|
+
const collectMissingTask = (name, input, result) => {
|
|
468
|
+
if (!isElementNotFoundResult(result)) return null;
|
|
469
|
+
return {
|
|
470
|
+
name,
|
|
471
|
+
input,
|
|
472
|
+
reason: toContentString(result.content).slice(0, 240)
|
|
473
|
+
};
|
|
474
|
+
};
|
|
475
|
+
/**
|
|
476
|
+
* 解析 REMAINING 协议(中)/ Parse REMAINING protocol from model text (EN).
|
|
477
|
+
*
|
|
478
|
+
* 支持:
|
|
479
|
+
* - `REMAINING: <text>` → 继续下一轮消费该剩余文本
|
|
480
|
+
* - `REMAINING: DONE` → 剩余任务为空
|
|
481
|
+
* 返回 null 表示本轮没有提供 REMAINING 标记。
|
|
482
|
+
*/
|
|
483
|
+
const parseRemainingInstruction = (text) => {
|
|
484
|
+
if (!text) return null;
|
|
485
|
+
const match = text.match(/REMAINING\s*:\s*([\s\S]*)$/i);
|
|
486
|
+
if (!match) return null;
|
|
487
|
+
const value = match[1].trim();
|
|
488
|
+
return /^done$/i.test(value) ? "" : value;
|
|
489
|
+
};
|
|
490
|
+
/**
|
|
491
|
+
* 推进下一轮描述(中)/ Derive next-round instruction from model text (EN).
|
|
492
|
+
*
|
|
493
|
+
* 优先 REMAINING 协议;若未提供,则把本轮 content 视为“更新后的任务描述”。
|
|
494
|
+
* Priority: REMAINING protocol first; otherwise treat current content as updated instruction.
|
|
495
|
+
*/
|
|
496
|
+
const deriveNextInstruction = (text, currentInstruction) => {
|
|
497
|
+
const parsed = parseRemainingInstruction(text);
|
|
498
|
+
if (parsed !== null) return {
|
|
499
|
+
nextInstruction: parsed,
|
|
500
|
+
hasRemainingProtocol: true
|
|
501
|
+
};
|
|
502
|
+
return {
|
|
503
|
+
nextInstruction: currentInstruction,
|
|
504
|
+
hasRemainingProtocol: false
|
|
505
|
+
};
|
|
506
|
+
};
|
|
203
507
|
for (let round = 0; round < maxRounds; round++) {
|
|
204
508
|
callbacks?.onRound?.(round);
|
|
205
|
-
|
|
206
|
-
|
|
509
|
+
usedRounds = round + 1;
|
|
510
|
+
if (!pageContext.latestSnapshot) await refreshSnapshot();
|
|
511
|
+
const effectivePrompt = stripSnapshotFromPrompt(systemPrompt);
|
|
512
|
+
const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks);
|
|
513
|
+
if (pendingNotFoundRetry && pendingNotFoundRetry.tasks.length > 0) chatMessages.push({
|
|
514
|
+
role: "user",
|
|
515
|
+
content: [
|
|
516
|
+
"## Not-found retry context",
|
|
517
|
+
`Retry attempt: ${pendingNotFoundRetry.attempt}/${DEFAULT_NOT_FOUND_RETRY_ROUNDS}`,
|
|
518
|
+
"These tool targets were not found in previous execution:",
|
|
519
|
+
...pendingNotFoundRetry.tasks.map((task, i) => `${i + 1}. ${task.name}(${JSON.stringify(task.input)}) -> ${task.reason}`),
|
|
520
|
+
"Only retry unresolved targets that are now visible in the latest snapshot.",
|
|
521
|
+
"If still not found, return no tool calls and include REMAINING with the unresolved part."
|
|
522
|
+
].join("\n")
|
|
523
|
+
});
|
|
207
524
|
const response = await client.chat({
|
|
208
525
|
systemPrompt: effectivePrompt,
|
|
209
526
|
messages: chatMessages,
|
|
210
527
|
tools
|
|
211
528
|
});
|
|
529
|
+
inputTokens += response.usage?.inputTokens ?? 0;
|
|
530
|
+
outputTokens += response.usage?.outputTokens ?? 0;
|
|
531
|
+
const nextInstructionState = deriveNextInstruction(response.text, remainingInstruction);
|
|
532
|
+
remainingInstruction = nextInstructionState.nextInstruction;
|
|
212
533
|
if (!response.toolCalls || response.toolCalls.length === 0) {
|
|
534
|
+
if (pendingNotFoundRetry) {
|
|
535
|
+
const unresolvedHint = response.text?.toLowerCase() ?? "";
|
|
536
|
+
if ((unresolvedHint.includes("找不到") || unresolvedHint.includes("未找到") || unresolvedHint.includes("not found") || unresolvedHint.includes("cannot find") || unresolvedHint.includes("unable to locate")) && pendingNotFoundRetry.attempt < DEFAULT_NOT_FOUND_RETRY_ROUNDS) {
|
|
537
|
+
pendingNotFoundRetry = {
|
|
538
|
+
...pendingNotFoundRetry,
|
|
539
|
+
attempt: pendingNotFoundRetry.attempt + 1
|
|
540
|
+
};
|
|
541
|
+
callbacks?.onText?.(`未命中目标,准备第 ${pendingNotFoundRetry.attempt} 次重试(等待 ${DEFAULT_NOT_FOUND_RETRY_WAIT_MS}ms)...`);
|
|
542
|
+
await sleep$1(DEFAULT_NOT_FOUND_RETRY_WAIT_MS);
|
|
543
|
+
await refreshSnapshot();
|
|
544
|
+
continue;
|
|
545
|
+
}
|
|
546
|
+
pendingNotFoundRetry = void 0;
|
|
547
|
+
}
|
|
213
548
|
finalReply = response.text ?? "";
|
|
214
549
|
if (finalReply) callbacks?.onText?.(finalReply);
|
|
215
550
|
break;
|
|
216
551
|
}
|
|
217
|
-
|
|
552
|
+
const plannedBatchKey = JSON.stringify(response.toolCalls.map((tc) => ({
|
|
553
|
+
name: tc.name,
|
|
554
|
+
input: tc.input
|
|
555
|
+
})));
|
|
556
|
+
if (plannedBatchKey === lastPlannedBatchKey) consecutiveSamePlannedBatch += 1;
|
|
557
|
+
else {
|
|
558
|
+
consecutiveSamePlannedBatch = 1;
|
|
559
|
+
lastPlannedBatchKey = plannedBatchKey;
|
|
560
|
+
}
|
|
561
|
+
if (consecutiveSamePlannedBatch >= 2 && !lastRoundHadError) {
|
|
562
|
+
finalReply = response.text?.trim() || "任务已完成。";
|
|
563
|
+
if (finalReply) callbacks?.onText?.(finalReply);
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
218
566
|
if (dryRun) {
|
|
219
567
|
finalReply = response.text ? response.text + "\n\n" : "";
|
|
220
568
|
finalReply += "🔧 AI 请求调用以下工具(dry-run 模式,未执行):\n";
|
|
@@ -229,103 +577,57 @@ async function executeAgentLoop(params) {
|
|
|
229
577
|
}
|
|
230
578
|
break;
|
|
231
579
|
}
|
|
580
|
+
let roundHasError = false;
|
|
581
|
+
const executedTaskCalls = [];
|
|
582
|
+
const roundMissingTasks = [];
|
|
232
583
|
for (const tc of response.toolCalls) {
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
callbacks?.onBeforeRecoverySnapshot?.(latestUrl);
|
|
240
|
-
pageContext.latestSnapshot = await readPageSnapshot(registry, 8);
|
|
241
|
-
if (tc.name === "dom") {
|
|
242
|
-
const result = {
|
|
243
|
-
content: `URL 已变更为 ${latestUrl},请基于最新快照重新定位目标元素。`,
|
|
244
|
-
details: {
|
|
245
|
-
error: true,
|
|
246
|
-
code: "URL_CHANGED_REQUIRE_NEW_SNAPSHOT",
|
|
247
|
-
url: latestUrl
|
|
248
|
-
}
|
|
249
|
-
};
|
|
250
|
-
allToolCalls.push({
|
|
251
|
-
name: tc.name,
|
|
252
|
-
input: tc.input,
|
|
253
|
-
result
|
|
254
|
-
});
|
|
255
|
-
fullToolTrace.push({
|
|
256
|
-
round,
|
|
257
|
-
name: tc.name,
|
|
258
|
-
input: tc.input,
|
|
259
|
-
result,
|
|
260
|
-
marker: "[URL变化待重定位]"
|
|
261
|
-
});
|
|
262
|
-
callbacks?.onToolResult?.(tc.name, result);
|
|
263
|
-
continue;
|
|
264
|
-
}
|
|
265
|
-
}
|
|
584
|
+
const redundant = checkRedundantSnapshot(tc.name, tc.input, pageContext.latestSnapshot, round);
|
|
585
|
+
if (redundant) {
|
|
586
|
+
appendToolTrace(round, tc.name, tc.input, redundant);
|
|
587
|
+
redundantInterceptCount += 1;
|
|
588
|
+
callbacks?.onToolResult?.(tc.name, redundant);
|
|
589
|
+
continue;
|
|
266
590
|
}
|
|
591
|
+
callbacks?.onToolCall?.(tc.name, tc.input);
|
|
267
592
|
let result = await registry.dispatch(tc.name, tc.input);
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
pageContext.latestSnapshot = await readPageSnapshot(registry, 8);
|
|
277
|
-
result = {
|
|
278
|
-
content: [
|
|
279
|
-
toContentString(result.content),
|
|
280
|
-
"",
|
|
281
|
-
`自动恢复 ${attempts}/${DEFAULT_ACTION_RECOVERY_ROUNDS}:已刷新快照,请重新定位目标元素。`
|
|
282
|
-
].join("\n"),
|
|
283
|
-
details: {
|
|
284
|
-
error: true,
|
|
285
|
-
code: "ELEMENT_NOT_FOUND_RECOVERY",
|
|
286
|
-
recoveryAttempt: attempts,
|
|
287
|
-
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
288
|
-
}
|
|
289
|
-
};
|
|
290
|
-
} else result = {
|
|
291
|
-
content: [
|
|
292
|
-
toContentString(result.content),
|
|
293
|
-
"",
|
|
294
|
-
`已达到最大自动恢复次数(${DEFAULT_ACTION_RECOVERY_ROUNDS})。请调整操作目标后重试。`
|
|
295
|
-
].join("\n"),
|
|
296
|
-
details: {
|
|
297
|
-
error: true,
|
|
298
|
-
code: "ELEMENT_NOT_FOUND_MAX_RECOVERY_REACHED",
|
|
299
|
-
recoveryAttempt: attempts,
|
|
300
|
-
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS
|
|
301
|
-
}
|
|
302
|
-
};
|
|
303
|
-
}
|
|
304
|
-
allToolCalls.push({
|
|
305
|
-
name: tc.name,
|
|
306
|
-
input: tc.input,
|
|
307
|
-
result
|
|
308
|
-
});
|
|
309
|
-
fullToolTrace.push({
|
|
310
|
-
round,
|
|
593
|
+
const debounced = applySnapshotDebounce(tc.name, tc.input, result, consecutiveSnapshotCalls);
|
|
594
|
+
result = debounced.result;
|
|
595
|
+
consecutiveSnapshotCalls = debounced.consecutiveCount;
|
|
596
|
+
const recovered = await handleElementRecovery(tc.name, tc.input, result, actionRecoveryAttempts, registry, pageContext, callbacks);
|
|
597
|
+
if (recovered) result = recovered;
|
|
598
|
+
if (recovered?.details && typeof recovered.details === "object" && recovered.details.code === "ELEMENT_NOT_FOUND_RECOVERY") recoveryCount += 1;
|
|
599
|
+
appendToolTrace(round, tc.name, tc.input, result);
|
|
600
|
+
executedTaskCalls.push({
|
|
311
601
|
name: tc.name,
|
|
312
|
-
input: tc.input
|
|
313
|
-
result
|
|
602
|
+
input: tc.input
|
|
314
603
|
});
|
|
315
|
-
|
|
316
|
-
if (
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
pageContext.currentUrl = newUrl;
|
|
322
|
-
callbacks?.onBeforeRecoverySnapshot?.(newUrl);
|
|
323
|
-
pageContext.latestSnapshot = await readPageSnapshot(registry, 8);
|
|
324
|
-
}
|
|
325
|
-
}
|
|
604
|
+
const missingTask = collectMissingTask(tc.name, tc.input, result);
|
|
605
|
+
if (missingTask) roundMissingTasks.push(missingTask);
|
|
606
|
+
if (result.details && typeof result.details === "object") roundHasError = roundHasError || Boolean(result.details.error);
|
|
607
|
+
if (tc.name === "page_info" && getToolAction(tc.input) === "snapshot") {
|
|
608
|
+
pageContext.latestSnapshot = toContentString(result.content);
|
|
609
|
+
recordSnapshotStats(pageContext.latestSnapshot);
|
|
326
610
|
}
|
|
611
|
+
await handleNavigationUrlChange(tc.name, tc.input, result, registry, pageContext, callbacks);
|
|
327
612
|
callbacks?.onToolResult?.(tc.name, result);
|
|
613
|
+
if (shouldForceRoundBreak(tc.name, tc.input)) break;
|
|
328
614
|
}
|
|
615
|
+
if (roundMissingTasks.length > 0) pendingNotFoundRetry = {
|
|
616
|
+
attempt: 1,
|
|
617
|
+
tasks: roundMissingTasks
|
|
618
|
+
};
|
|
619
|
+
else pendingNotFoundRetry = void 0;
|
|
620
|
+
if (!nextInstructionState.hasRemainingProtocol) roundHasError = true;
|
|
621
|
+
lastRoundHadError = roundHasError;
|
|
622
|
+
previousRoundTasks = buildTaskArray(executedTaskCalls);
|
|
623
|
+
const idleResult = detectIdleLoop(executedTaskCalls.map((tc) => tc.name), consecutiveReadOnlyRounds);
|
|
624
|
+
if (idleResult === -1) {
|
|
625
|
+
finalReply = response.text || "任务已完成。";
|
|
626
|
+
if (finalReply) callbacks?.onText?.(finalReply);
|
|
627
|
+
break;
|
|
628
|
+
}
|
|
629
|
+
consecutiveReadOnlyRounds = idleResult;
|
|
630
|
+
await refreshSnapshot();
|
|
329
631
|
}
|
|
330
632
|
const resultMessages = [...history ?? [], {
|
|
331
633
|
role: "user",
|
|
@@ -335,70 +637,146 @@ async function executeAgentLoop(params) {
|
|
|
335
637
|
role: "assistant",
|
|
336
638
|
content: finalReply
|
|
337
639
|
});
|
|
640
|
+
const successfulToolCalls = allToolCalls.filter((tc) => {
|
|
641
|
+
const details = tc.result.details;
|
|
642
|
+
return !(details && typeof details === "object" && Boolean(details.error));
|
|
643
|
+
}).length;
|
|
644
|
+
const failedToolCalls = allToolCalls.length - successfulToolCalls;
|
|
645
|
+
const metrics = {
|
|
646
|
+
roundCount: usedRounds,
|
|
647
|
+
totalToolCalls: allToolCalls.length,
|
|
648
|
+
successfulToolCalls,
|
|
649
|
+
failedToolCalls,
|
|
650
|
+
toolSuccessRate: allToolCalls.length > 0 ? Number((successfulToolCalls / allToolCalls.length).toFixed(4)) : 1,
|
|
651
|
+
recoveryCount,
|
|
652
|
+
redundantInterceptCount,
|
|
653
|
+
snapshotReadCount,
|
|
654
|
+
latestSnapshotSize: pageContext.latestSnapshot?.length ?? 0,
|
|
655
|
+
avgSnapshotSize: snapshotReadCount > 0 ? Math.round(snapshotSizeTotal / snapshotReadCount) : 0,
|
|
656
|
+
maxSnapshotSize: snapshotSizeMax,
|
|
657
|
+
inputTokens,
|
|
658
|
+
outputTokens
|
|
659
|
+
};
|
|
660
|
+
callbacks?.onMetrics?.(metrics);
|
|
338
661
|
return {
|
|
339
662
|
reply: finalReply,
|
|
340
663
|
toolCalls: allToolCalls,
|
|
341
|
-
messages: resultMessages
|
|
664
|
+
messages: resultMessages,
|
|
665
|
+
metrics
|
|
342
666
|
};
|
|
343
667
|
}
|
|
344
668
|
|
|
345
669
|
//#endregion
|
|
346
670
|
//#region src/core/ai-client/constants.ts
|
|
347
|
-
/**
|
|
348
|
-
* 各 Provider 的默认 API 端点。
|
|
349
|
-
*
|
|
350
|
-
* - openai → OpenAI 官方 API
|
|
351
|
-
* - copilot → GitHub Models API(使用 OpenAI 兼容格式)
|
|
352
|
-
* - anthropic → Anthropic Messages API
|
|
353
|
-
*/
|
|
671
|
+
/** 默认端点映射(中)/ Default API endpoints by provider (EN). */
|
|
354
672
|
const PROVIDER_ENDPOINTS = {
|
|
355
673
|
openai: "https://api.openai.com/v1",
|
|
356
674
|
copilot: "https://models.inference.ai.azure.com",
|
|
357
675
|
anthropic: "https://api.anthropic.com",
|
|
358
676
|
deepseek: "https://api.deepseek.com"
|
|
359
677
|
};
|
|
360
|
-
/**
|
|
361
|
-
* 校验 provider 是否受支持。
|
|
362
|
-
*
|
|
363
|
-
* @throws 不支持的 provider 抛出 Error,附带支持列表
|
|
364
|
-
*/
|
|
678
|
+
/** 校验 provider(中)/ Validate provider support (EN). */
|
|
365
679
|
function validateProvider(provider) {
|
|
366
680
|
if (!PROVIDER_ENDPOINTS[provider]) {
|
|
367
681
|
const supported = Object.keys(PROVIDER_ENDPOINTS).join(", ");
|
|
368
682
|
throw new Error(`Unknown AI provider: ${provider}. Supported: ${supported}`);
|
|
369
683
|
}
|
|
370
684
|
}
|
|
371
|
-
/**
|
|
372
|
-
* 解析 provider 对应的 API 基础 URL。
|
|
373
|
-
*
|
|
374
|
-
* 优先使用用户自定义的 baseURL(如本地 Ollama),
|
|
375
|
-
* 其次使用 PROVIDER_ENDPOINTS 中的默认值。
|
|
376
|
-
*/
|
|
685
|
+
/** 解析 baseURL(中)/ Resolve API base URL (EN). */
|
|
377
686
|
function resolveBaseURL(config) {
|
|
378
687
|
return config.baseURL ?? PROVIDER_ENDPOINTS[config.provider] ?? "";
|
|
379
688
|
}
|
|
380
689
|
/**
|
|
381
|
-
* 清理
|
|
382
|
-
*
|
|
383
|
-
* TypeBox 的 Type.Object() 产物包含 Symbol key(如 [Kind]、[Hint]),
|
|
384
|
-
* 这些 Symbol 在 JSON.stringify 时会被忽略,但某些 AI API 端点
|
|
385
|
-
* 对 JSON Schema 做严格校验时可能报错。
|
|
386
|
-
*
|
|
387
|
-
* 通过 JSON roundtrip(stringify → parse)清理掉所有不可序列化的属性。
|
|
690
|
+
* 清理 schema(中)/ Clean non-serializable fields from schema (EN).
|
|
388
691
|
*/
|
|
389
692
|
function cleanSchema(schema) {
|
|
390
693
|
return JSON.parse(JSON.stringify(schema));
|
|
391
694
|
}
|
|
392
695
|
|
|
393
696
|
//#endregion
|
|
394
|
-
//#region src/core/ai-client/
|
|
697
|
+
//#region src/core/ai-client/sse.ts
|
|
395
698
|
/**
|
|
396
|
-
*
|
|
699
|
+
* 通用 SSE(JSON) 消费器(中)/ Generic SSE(JSON) consumer (EN).
|
|
397
700
|
*
|
|
398
|
-
*
|
|
399
|
-
*
|
|
400
|
-
|
|
401
|
-
|
|
701
|
+
* 读取 response.body,按 SSE 规则拼装并分发 JSON data 事件。
|
|
702
|
+
* Reads response body, assembles SSE frames, and dispatches JSON data events.
|
|
703
|
+
*/
|
|
704
|
+
async function consumeSSEJSON(response, onEvent, options = {}) {
|
|
705
|
+
if (!response.body) return;
|
|
706
|
+
const reader = response.body.getReader();
|
|
707
|
+
const decoder = new TextDecoder();
|
|
708
|
+
const stopOnDone = options.stopOnDone ?? true;
|
|
709
|
+
let buffer = "";
|
|
710
|
+
let currentEvent;
|
|
711
|
+
let dataLines = [];
|
|
712
|
+
let stoppedByDone = false;
|
|
713
|
+
async function readChunk() {
|
|
714
|
+
const readTimeoutMs = options.readTimeoutMs;
|
|
715
|
+
if (!readTimeoutMs || readTimeoutMs <= 0) return reader.read();
|
|
716
|
+
return new Promise((resolve, reject) => {
|
|
717
|
+
const timer = setTimeout(() => {
|
|
718
|
+
reject(/* @__PURE__ */ new Error(`SSE read timeout (${readTimeoutMs}ms)`));
|
|
719
|
+
}, readTimeoutMs);
|
|
720
|
+
reader.read().then((value) => {
|
|
721
|
+
clearTimeout(timer);
|
|
722
|
+
resolve(value);
|
|
723
|
+
}, (error) => {
|
|
724
|
+
clearTimeout(timer);
|
|
725
|
+
reject(error);
|
|
726
|
+
});
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
async function flushEvent() {
|
|
730
|
+
if (dataLines.length === 0) {
|
|
731
|
+
currentEvent = void 0;
|
|
732
|
+
return true;
|
|
733
|
+
}
|
|
734
|
+
const rawData = dataLines.join("\n").trim();
|
|
735
|
+
const event = currentEvent;
|
|
736
|
+
dataLines = [];
|
|
737
|
+
currentEvent = void 0;
|
|
738
|
+
if (!rawData) return true;
|
|
739
|
+
if (stopOnDone && rawData === "[DONE]") {
|
|
740
|
+
stoppedByDone = true;
|
|
741
|
+
return false;
|
|
742
|
+
}
|
|
743
|
+
try {
|
|
744
|
+
if (await onEvent(JSON.parse(rawData), {
|
|
745
|
+
event,
|
|
746
|
+
rawData
|
|
747
|
+
}) === false) return false;
|
|
748
|
+
} catch {}
|
|
749
|
+
return true;
|
|
750
|
+
}
|
|
751
|
+
while (true) {
|
|
752
|
+
const { done, value } = await readChunk();
|
|
753
|
+
if (done) break;
|
|
754
|
+
buffer += decoder.decode(value, { stream: true });
|
|
755
|
+
const lines = buffer.split("\n");
|
|
756
|
+
buffer = lines.pop() ?? "";
|
|
757
|
+
for (const rawLine of lines) {
|
|
758
|
+
const trimmed = (rawLine.endsWith("\r") ? rawLine.slice(0, -1) : rawLine).trim();
|
|
759
|
+
if (!trimmed) {
|
|
760
|
+
if (!await flushEvent()) break;
|
|
761
|
+
continue;
|
|
762
|
+
}
|
|
763
|
+
if (trimmed.startsWith(":")) continue;
|
|
764
|
+
if (trimmed.startsWith("event:")) {
|
|
765
|
+
currentEvent = trimmed.slice(6).trim() || void 0;
|
|
766
|
+
continue;
|
|
767
|
+
}
|
|
768
|
+
if (trimmed.startsWith("data:")) dataLines.push(trimmed.slice(5).trimStart());
|
|
769
|
+
}
|
|
770
|
+
if (stoppedByDone) break;
|
|
771
|
+
}
|
|
772
|
+
if (!stoppedByDone) await flushEvent();
|
|
773
|
+
else await reader.cancel().catch(() => void 0);
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
//#endregion
|
|
777
|
+
//#region src/core/ai-client/custom.ts
|
|
778
|
+
/**
|
|
779
|
+
* BaseAIClient 实现(中)/ BaseAIClient implementation of AIClient (EN).
|
|
402
780
|
*/
|
|
403
781
|
var BaseAIClient = class {
|
|
404
782
|
/** 用户提供的对话处理函数 */
|
|
@@ -407,47 +785,21 @@ var BaseAIClient = class {
|
|
|
407
785
|
this.chatHandler = options.chatHandler;
|
|
408
786
|
}
|
|
409
787
|
/**
|
|
410
|
-
*
|
|
411
|
-
*
|
|
412
|
-
* 默认实现直接委托给 `chatHandler`。
|
|
413
|
-
* 子类可覆盖此方法添加中间件逻辑(日志、重试、缓存等)。
|
|
414
|
-
*
|
|
415
|
-
* @param params - 统一格式的聊天参数
|
|
416
|
-
* @returns 统一格式的 AI 响应
|
|
788
|
+
* 发送对话请求(中)/ Dispatch chat request via handler (EN).
|
|
417
789
|
*/
|
|
418
790
|
async chat(params) {
|
|
419
791
|
return this.chatHandler(params);
|
|
420
792
|
}
|
|
793
|
+
/** SSE 消费复用入口(中)/ Reusable SSE(JSON) consumer for subclasses (EN). */
|
|
794
|
+
async consumeSSEJSON(response, onEvent, options) {
|
|
795
|
+
return consumeSSEJSON(response, onEvent, options);
|
|
796
|
+
}
|
|
421
797
|
};
|
|
422
798
|
|
|
423
799
|
//#endregion
|
|
424
800
|
//#region src/core/ai-client/openai.ts
|
|
425
801
|
/**
|
|
426
|
-
*
|
|
427
|
-
*
|
|
428
|
-
* 封装完整的 OpenAI Chat Completions API 调用流程:
|
|
429
|
-
* 1. buildOpenAIRequest() → 构建 HTTP 请求
|
|
430
|
-
* 2. fetch() → 发送请求
|
|
431
|
-
* 3. parseOpenAIResponse() → 解析响应为统一格式
|
|
432
|
-
*
|
|
433
|
-
* 使用示例:
|
|
434
|
-
* ```ts
|
|
435
|
-
* const client = new OpenAIClient({
|
|
436
|
-
* provider: "openai",
|
|
437
|
-
* model: "gpt-4o",
|
|
438
|
-
* apiKey: "sk-xxx",
|
|
439
|
-
* });
|
|
440
|
-
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
441
|
-
* ```
|
|
442
|
-
*
|
|
443
|
-
* 也可用于 Copilot(GitHub Models):
|
|
444
|
-
* ```ts
|
|
445
|
-
* const client = new OpenAIClient({
|
|
446
|
-
* provider: "copilot",
|
|
447
|
-
* model: "gpt-4o",
|
|
448
|
-
* apiKey: "ghp_xxx",
|
|
449
|
-
* });
|
|
450
|
-
* ```
|
|
802
|
+
* OpenAIClient 类(中)/ OpenAIClient class for OpenAI & Copilot (EN).
|
|
451
803
|
*/
|
|
452
804
|
var OpenAIClient = class extends BaseAIClient {
|
|
453
805
|
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
@@ -455,30 +807,35 @@ var OpenAIClient = class extends BaseAIClient {
|
|
|
455
807
|
constructor(config) {
|
|
456
808
|
super({ chatHandler: async (params) => {
|
|
457
809
|
const req = buildOpenAIRequest(this.config, params);
|
|
458
|
-
|
|
810
|
+
if (!(this.config.stream ?? true)) {
|
|
811
|
+
const res = await fetch(req.url, {
|
|
812
|
+
method: req.method,
|
|
813
|
+
headers: req.headers,
|
|
814
|
+
body: req.body
|
|
815
|
+
});
|
|
816
|
+
if (!res.ok) {
|
|
817
|
+
const errText = await res.text();
|
|
818
|
+
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
819
|
+
}
|
|
820
|
+
return parseOpenAIResponse(await res.json());
|
|
821
|
+
}
|
|
822
|
+
const streamRes = await fetch(req.url, {
|
|
459
823
|
method: req.method,
|
|
460
824
|
headers: req.headers,
|
|
461
825
|
body: req.body
|
|
462
826
|
});
|
|
463
|
-
if (!
|
|
464
|
-
const errText = await
|
|
465
|
-
throw new Error(`AI API ${
|
|
827
|
+
if (!streamRes.ok) {
|
|
828
|
+
const errText = await streamRes.text();
|
|
829
|
+
throw new Error(`AI API ${streamRes.status}: ${errText.slice(0, 500)}`);
|
|
466
830
|
}
|
|
467
|
-
return parseOpenAIResponse(await
|
|
831
|
+
if ((streamRes.headers.get("content-type") ?? "").includes("application/json")) return parseOpenAIResponse(await streamRes.json());
|
|
832
|
+
return parseOpenAIStream(streamRes, 2e4);
|
|
468
833
|
} });
|
|
469
834
|
this.config = config;
|
|
470
835
|
}
|
|
471
836
|
};
|
|
472
837
|
/**
|
|
473
|
-
*
|
|
474
|
-
*
|
|
475
|
-
* 转换逻辑:
|
|
476
|
-
* - system prompt → `{ role: "system", content }` 消息
|
|
477
|
-
* - 工具定义 → `tools` 数组(function calling 格式)
|
|
478
|
-
* - 工具结果 → 拆分为多条 `{ role: "tool", tool_call_id }` 消息
|
|
479
|
-
* - AI 回复含工具调用 → `tool_calls` 字段
|
|
480
|
-
*
|
|
481
|
-
* 默认参数:temperature=0.3, max_tokens=8192, tool_choice="auto"
|
|
838
|
+
* 构建 OpenAI 请求(中)/ Build OpenAI chat request payload (EN).
|
|
482
839
|
*/
|
|
483
840
|
function buildOpenAIRequest(config, params) {
|
|
484
841
|
const baseURL = resolveBaseURL(config);
|
|
@@ -496,11 +853,16 @@ function buildOpenAIRequest(config, params) {
|
|
|
496
853
|
model: config.model,
|
|
497
854
|
messages: openaiMessages,
|
|
498
855
|
temperature: .3,
|
|
499
|
-
max_tokens:
|
|
856
|
+
max_tokens: 4096
|
|
500
857
|
};
|
|
858
|
+
if (config.stream ?? true) {
|
|
859
|
+
body.stream = true;
|
|
860
|
+
body.stream_options = { include_usage: true };
|
|
861
|
+
}
|
|
501
862
|
if (openaiTools && openaiTools.length > 0) {
|
|
502
863
|
body.tools = openaiTools;
|
|
503
864
|
body.tool_choice = "auto";
|
|
865
|
+
body.parallel_tool_calls = true;
|
|
504
866
|
}
|
|
505
867
|
return {
|
|
506
868
|
url: `${baseURL}/chat/completions`,
|
|
@@ -513,14 +875,7 @@ function buildOpenAIRequest(config, params) {
|
|
|
513
875
|
};
|
|
514
876
|
}
|
|
515
877
|
/**
|
|
516
|
-
*
|
|
517
|
-
*
|
|
518
|
-
* 解析要点:
|
|
519
|
-
* - 文本回复 → `choice.message.content`
|
|
520
|
-
* - 工具调用 → `choice.message.tool_calls`,arguments 为 JSON 字符串需 parse
|
|
521
|
-
* - Token 用量 → `usage.prompt_tokens` / `usage.completion_tokens`
|
|
522
|
-
*
|
|
523
|
-
* @throws 无有效 choice 时抛出 Error
|
|
878
|
+
* 解析 OpenAI 响应(中)/ Parse raw OpenAI response into AIChatResponse (EN).
|
|
524
879
|
*/
|
|
525
880
|
function parseOpenAIResponse(data) {
|
|
526
881
|
const d = data;
|
|
@@ -542,12 +897,7 @@ function parseOpenAIResponse(data) {
|
|
|
542
897
|
};
|
|
543
898
|
}
|
|
544
899
|
/**
|
|
545
|
-
*
|
|
546
|
-
*
|
|
547
|
-
* 三种特殊消息的处理:
|
|
548
|
-
* 1. tool 消息(工具结果)→ 每个结果拆分为单独的 `role: "tool"` 消息
|
|
549
|
-
* 2. assistant 含 toolCalls → 附带 `tool_calls` 字段
|
|
550
|
-
* 3. 其他消息 → 直接映射 role + content
|
|
900
|
+
* 消息转换(中)/ Convert unified messages to OpenAI format (EN).
|
|
551
901
|
*/
|
|
552
902
|
function convertMessages$1(systemPrompt, messages) {
|
|
553
903
|
const result = [{
|
|
@@ -577,26 +927,56 @@ function convertMessages$1(systemPrompt, messages) {
|
|
|
577
927
|
});
|
|
578
928
|
return result;
|
|
579
929
|
}
|
|
930
|
+
/**
|
|
931
|
+
* 解析 OpenAI SSE(中)/ Parse OpenAI SSE stream into unified response (EN).
|
|
932
|
+
*/
|
|
933
|
+
async function parseOpenAIStream(response, readTimeoutMs = 2e4) {
|
|
934
|
+
if (!response.body) return parseOpenAIResponse(await response.json());
|
|
935
|
+
let text = "";
|
|
936
|
+
const toolCallMap = /* @__PURE__ */ new Map();
|
|
937
|
+
let usage;
|
|
938
|
+
await consumeSSEJSON(response, (event) => {
|
|
939
|
+
const chunk = event;
|
|
940
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
941
|
+
if (delta?.content) text += delta.content;
|
|
942
|
+
if (delta?.tool_calls) for (const tc of delta.tool_calls) {
|
|
943
|
+
const idx = tc.index ?? 0;
|
|
944
|
+
const existing = toolCallMap.get(idx);
|
|
945
|
+
if (existing) {
|
|
946
|
+
if (tc.function?.arguments) existing.arguments += tc.function.arguments;
|
|
947
|
+
} else toolCallMap.set(idx, {
|
|
948
|
+
id: tc.id ?? "",
|
|
949
|
+
name: tc.function?.name ?? "",
|
|
950
|
+
arguments: tc.function?.arguments ?? ""
|
|
951
|
+
});
|
|
952
|
+
}
|
|
953
|
+
if (chunk.usage) usage = {
|
|
954
|
+
inputTokens: chunk.usage.prompt_tokens ?? 0,
|
|
955
|
+
outputTokens: chunk.usage.completion_tokens ?? 0
|
|
956
|
+
};
|
|
957
|
+
}, {
|
|
958
|
+
readTimeoutMs,
|
|
959
|
+
stopOnDone: true
|
|
960
|
+
});
|
|
961
|
+
const toolCalls = [];
|
|
962
|
+
for (const [, tc] of [...toolCallMap.entries()].sort((a, b) => a[0] - b[0])) try {
|
|
963
|
+
toolCalls.push({
|
|
964
|
+
id: tc.id,
|
|
965
|
+
name: tc.name,
|
|
966
|
+
input: JSON.parse(tc.arguments)
|
|
967
|
+
});
|
|
968
|
+
} catch {}
|
|
969
|
+
return {
|
|
970
|
+
text: text || void 0,
|
|
971
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
972
|
+
usage
|
|
973
|
+
};
|
|
974
|
+
}
|
|
580
975
|
|
|
581
976
|
//#endregion
|
|
582
977
|
//#region src/core/ai-client/anthropic.ts
|
|
583
978
|
/**
|
|
584
|
-
*
|
|
585
|
-
*
|
|
586
|
-
* 封装完整的 Anthropic Messages API 调用流程:
|
|
587
|
-
* 1. buildAnthropicRequest() → 构建 HTTP 请求
|
|
588
|
-
* 2. fetch() → 发送请求
|
|
589
|
-
* 3. parseAnthropicResponse() → 解析响应为统一格式
|
|
590
|
-
*
|
|
591
|
-
* 使用示例:
|
|
592
|
-
* ```ts
|
|
593
|
-
* const client = new AnthropicClient({
|
|
594
|
-
* provider: "anthropic",
|
|
595
|
-
* model: "claude-sonnet-4-20250514",
|
|
596
|
-
* apiKey: "sk-ant-xxx",
|
|
597
|
-
* });
|
|
598
|
-
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
599
|
-
* ```
|
|
979
|
+
* AnthropicClient 类(中)/ AnthropicClient class (EN).
|
|
600
980
|
*/
|
|
601
981
|
var AnthropicClient = class extends BaseAIClient {
|
|
602
982
|
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
@@ -604,6 +984,18 @@ var AnthropicClient = class extends BaseAIClient {
|
|
|
604
984
|
constructor(config) {
|
|
605
985
|
super({ chatHandler: async (params) => {
|
|
606
986
|
const req = buildAnthropicRequest(this.config, params);
|
|
987
|
+
if (!(this.config.stream ?? true)) {
|
|
988
|
+
const res = await fetch(req.url, {
|
|
989
|
+
method: req.method,
|
|
990
|
+
headers: req.headers,
|
|
991
|
+
body: req.body
|
|
992
|
+
});
|
|
993
|
+
if (!res.ok) {
|
|
994
|
+
const errText = await res.text();
|
|
995
|
+
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
996
|
+
}
|
|
997
|
+
return parseAnthropicResponse(await res.json());
|
|
998
|
+
}
|
|
607
999
|
const res = await fetch(req.url, {
|
|
608
1000
|
method: req.method,
|
|
609
1001
|
headers: req.headers,
|
|
@@ -613,22 +1005,14 @@ var AnthropicClient = class extends BaseAIClient {
|
|
|
613
1005
|
const errText = await res.text();
|
|
614
1006
|
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
615
1007
|
}
|
|
616
|
-
return parseAnthropicResponse(await res.json());
|
|
1008
|
+
if ((res.headers.get("content-type") ?? "").includes("application/json")) return parseAnthropicResponse(await res.json());
|
|
1009
|
+
return parseAnthropicStream(res);
|
|
617
1010
|
} });
|
|
618
1011
|
this.config = config;
|
|
619
1012
|
}
|
|
620
1013
|
};
|
|
621
1014
|
/**
|
|
622
|
-
*
|
|
623
|
-
*
|
|
624
|
-
* 关键格式差异(与 OpenAI 相比):
|
|
625
|
-
* - system prompt → body.system 字段(非消息数组元素)
|
|
626
|
-
* - 工具定义 → input_schema(而非 parameters)
|
|
627
|
-
* - 工具结果 → user 角色 + tool_result content block
|
|
628
|
-
* - AI 工具调用 → assistant 角色 + tool_use content block
|
|
629
|
-
*
|
|
630
|
-
* max_tokens 策略:opus 模型 16384,其他模型 8192。
|
|
631
|
-
* 认证头使用 `x-api-key`(而非 Authorization Bearer)。
|
|
1015
|
+
* 构建 Anthropic 请求(中)/ Build Anthropic Messages API request (EN).
|
|
632
1016
|
*/
|
|
633
1017
|
function buildAnthropicRequest(config, params) {
|
|
634
1018
|
const baseURL = resolveBaseURL(config);
|
|
@@ -645,6 +1029,7 @@ function buildAnthropicRequest(config, params) {
|
|
|
645
1029
|
system: systemPrompt,
|
|
646
1030
|
messages: anthropicMessages
|
|
647
1031
|
};
|
|
1032
|
+
if (config.stream ?? true) body.stream = true;
|
|
648
1033
|
if (anthropicTools && anthropicTools.length > 0) body.tools = anthropicTools;
|
|
649
1034
|
return {
|
|
650
1035
|
url: `${baseURL}/v1/messages`,
|
|
@@ -658,13 +1043,7 @@ function buildAnthropicRequest(config, params) {
|
|
|
658
1043
|
};
|
|
659
1044
|
}
|
|
660
1045
|
/**
|
|
661
|
-
*
|
|
662
|
-
*
|
|
663
|
-
* Anthropic 使用 content block 数组返回多种内容:
|
|
664
|
-
* - type="text" → 文本回复(可能多个,合并为一个字符串)
|
|
665
|
-
* - type="tool_use" → 工具调用(id + name + input)
|
|
666
|
-
*
|
|
667
|
-
* Token 用量字段名也不同:input_tokens / output_tokens(非 prompt_tokens)。
|
|
1046
|
+
* 解析 Anthropic 响应(中)/ Parse raw Anthropic response (EN).
|
|
668
1047
|
*/
|
|
669
1048
|
function parseAnthropicResponse(data) {
|
|
670
1049
|
const d = data;
|
|
@@ -684,12 +1063,7 @@ function parseAnthropicResponse(data) {
|
|
|
684
1063
|
};
|
|
685
1064
|
}
|
|
686
1065
|
/**
|
|
687
|
-
*
|
|
688
|
-
*
|
|
689
|
-
* 关键差异处理:
|
|
690
|
-
* 1. 过滤 system 消息(Anthropic 通过 body.system 传入)
|
|
691
|
-
* 2. tool 角色消息 → user 角色 + tool_result content block
|
|
692
|
-
* 3. assistant 含 toolCalls → text + tool_use content blocks
|
|
1066
|
+
* 消息格式转换(中)/ Convert unified messages to Anthropic format (EN).
|
|
693
1067
|
*/
|
|
694
1068
|
function convertMessages(messages) {
|
|
695
1069
|
return messages.filter((m) => m.role !== "system").map((m) => {
|
|
@@ -724,64 +1098,80 @@ function convertMessages(messages) {
|
|
|
724
1098
|
};
|
|
725
1099
|
});
|
|
726
1100
|
}
|
|
1101
|
+
/**
|
|
1102
|
+
* 解析 Anthropic SSE(中)/ Parse Anthropic SSE stream (EN).
|
|
1103
|
+
*/
|
|
1104
|
+
async function parseAnthropicStream(response) {
|
|
1105
|
+
if (!response.body) return parseAnthropicResponse(await response.json());
|
|
1106
|
+
let text = "";
|
|
1107
|
+
const toolCalls = [];
|
|
1108
|
+
let currentToolUse = null;
|
|
1109
|
+
let inputTokens = 0;
|
|
1110
|
+
let outputTokens = 0;
|
|
1111
|
+
await consumeSSEJSON(response, (event) => {
|
|
1112
|
+
switch (event.type) {
|
|
1113
|
+
case "message_start":
|
|
1114
|
+
inputTokens = event.message?.usage?.input_tokens ?? 0;
|
|
1115
|
+
break;
|
|
1116
|
+
case "content_block_start": {
|
|
1117
|
+
const block = event.content_block;
|
|
1118
|
+
if (block?.type === "tool_use") currentToolUse = {
|
|
1119
|
+
id: block.id ?? "",
|
|
1120
|
+
name: block.name ?? "",
|
|
1121
|
+
inputJson: ""
|
|
1122
|
+
};
|
|
1123
|
+
break;
|
|
1124
|
+
}
|
|
1125
|
+
case "content_block_delta": {
|
|
1126
|
+
const delta = event.delta;
|
|
1127
|
+
if (delta?.type === "text_delta") text += delta.text ?? "";
|
|
1128
|
+
else if (delta?.type === "input_json_delta" && currentToolUse) currentToolUse.inputJson += delta.partial_json ?? "";
|
|
1129
|
+
break;
|
|
1130
|
+
}
|
|
1131
|
+
case "content_block_stop":
|
|
1132
|
+
if (currentToolUse) {
|
|
1133
|
+
try {
|
|
1134
|
+
toolCalls.push({
|
|
1135
|
+
id: currentToolUse.id,
|
|
1136
|
+
name: currentToolUse.name,
|
|
1137
|
+
input: JSON.parse(currentToolUse.inputJson || "{}")
|
|
1138
|
+
});
|
|
1139
|
+
} catch {}
|
|
1140
|
+
currentToolUse = null;
|
|
1141
|
+
}
|
|
1142
|
+
break;
|
|
1143
|
+
case "message_delta":
|
|
1144
|
+
outputTokens = event.usage?.output_tokens ?? 0;
|
|
1145
|
+
break;
|
|
1146
|
+
}
|
|
1147
|
+
}, { stopOnDone: false });
|
|
1148
|
+
return {
|
|
1149
|
+
text: text || void 0,
|
|
1150
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
1151
|
+
usage: inputTokens > 0 || outputTokens > 0 ? {
|
|
1152
|
+
inputTokens,
|
|
1153
|
+
outputTokens
|
|
1154
|
+
} : void 0
|
|
1155
|
+
};
|
|
1156
|
+
}
|
|
727
1157
|
|
|
728
1158
|
//#endregion
|
|
729
1159
|
//#region src/core/ai-client/deepseek.ts
|
|
730
1160
|
/**
|
|
731
|
-
* DeepSeek
|
|
732
|
-
*
|
|
733
|
-
* DeepSeek 使用 OpenAI 兼容的 Chat Completions API 格式,
|
|
734
|
-
* 因此直接继承 OpenAIClient,复用请求构建和响应解析逻辑。
|
|
735
|
-
*
|
|
736
|
-
* 差异点(相对于 OpenAI):
|
|
737
|
-
* - 端点:https://api.deepseek.com(Constants 中配置)
|
|
738
|
-
* - 模型:deepseek-chat(V3)、deepseek-reasoner(R1)等
|
|
739
|
-
* - 认证:Authorization: Bearer <API Key>(与 OpenAI 相同)
|
|
740
|
-
* - tool_calls 格式与 OpenAI 完全一致
|
|
741
|
-
*
|
|
742
|
-
* 继承关系:
|
|
743
|
-
* BaseAIClient(custom.ts)
|
|
744
|
-
* └── OpenAIClient(openai.ts)
|
|
745
|
-
* └── DeepSeekClient(本文件)— 可覆盖默认参数
|
|
746
|
-
*
|
|
747
|
-
* 使用示例:
|
|
748
|
-
* ```ts
|
|
749
|
-
* const client = new DeepSeekClient({
|
|
750
|
-
* provider: "deepseek",
|
|
751
|
-
* model: "deepseek-chat",
|
|
752
|
-
* apiKey: "sk-xxx",
|
|
753
|
-
* });
|
|
754
|
-
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
755
|
-
* ```
|
|
1161
|
+
* DeepSeek 客户端封装(中)/ DeepSeek client wrapper (EN).
|
|
756
1162
|
*
|
|
757
|
-
*
|
|
758
|
-
* -
|
|
759
|
-
* - Chat API: https://api-docs.deepseek.com/zh-cn/api/create-chat-completion/
|
|
1163
|
+
* DeepSeek 与 OpenAI Chat Completions 兼容,直接复用 OpenAIClient。
|
|
1164
|
+
* DeepSeek is OpenAI-compatible, so it reuses OpenAIClient behavior.
|
|
760
1165
|
*/
|
|
761
1166
|
/**
|
|
762
|
-
* DeepSeek
|
|
763
|
-
*
|
|
764
|
-
* DeepSeek API 与 OpenAI Chat Completions API 完全兼容,
|
|
765
|
-
* 包括 tool_calls、function calling、消息格式等。
|
|
766
|
-
*
|
|
767
|
-
* 如需自定义 DeepSeek 特有行为(如 strict 模式、思考模式等),
|
|
768
|
-
* 可在此类中覆盖相关方法。
|
|
1167
|
+
* DeepSeek 客户端类(中)/ DeepSeek client class extending OpenAIClient (EN).
|
|
769
1168
|
*/
|
|
770
1169
|
var DeepSeekClient = class extends OpenAIClient {};
|
|
771
1170
|
|
|
772
1171
|
//#endregion
|
|
773
1172
|
//#region src/core/ai-client/index.ts
|
|
774
1173
|
/**
|
|
775
|
-
* 创建 AI
|
|
776
|
-
*
|
|
777
|
-
* 根据 provider 自动创建对应的客户端类实例:
|
|
778
|
-
* - openai / copilot → new OpenAIClient(config)
|
|
779
|
-
* - anthropic → new AnthropicClient(config)
|
|
780
|
-
*
|
|
781
|
-
* 返回 AIClient 接口,调用 chat() 即可与 AI 对话。
|
|
782
|
-
*
|
|
783
|
-
* @param config - 包含 provider、model、apiKey 等配置
|
|
784
|
-
* @returns AIClient 实例(OpenAIClient 或 AnthropicClient)
|
|
1174
|
+
* 创建 AI 客户端(中)/ Create AI client by provider (EN).
|
|
785
1175
|
*/
|
|
786
1176
|
function createAIClient(config) {
|
|
787
1177
|
validateProvider(config.provider);
|
|
@@ -849,31 +1239,78 @@ var ToolRegistry = class {
|
|
|
849
1239
|
//#endregion
|
|
850
1240
|
//#region src/core/system-prompt.ts
|
|
851
1241
|
/**
|
|
852
|
-
*
|
|
853
|
-
|
|
1242
|
+
* 规范化额外指令(中)/ Normalize additional instructions (EN).
|
|
1243
|
+
*/
|
|
1244
|
+
function normalizeExtraInstructions(input) {
|
|
1245
|
+
if (!input) return [];
|
|
1246
|
+
return (Array.isArray(input) ? input : [input]).map((s) => s.trim()).filter(Boolean);
|
|
1247
|
+
}
|
|
1248
|
+
/**
|
|
1249
|
+
* 构建系统提示词(中)/ Build system prompt (EN).
|
|
1250
|
+
*
|
|
1251
|
+
* 约束:
|
|
1252
|
+
* - 输出给模型的提示词正文统一为英文。
|
|
1253
|
+
* - 中文仅用于代码注释,便于团队维护。
|
|
1254
|
+
*
|
|
1255
|
+
* Constraints:
|
|
1256
|
+
* - Prompt text sent to model stays English-only.
|
|
1257
|
+
* - Chinese content is used in code comments only for maintainability.
|
|
854
1258
|
*/
|
|
855
1259
|
function buildSystemPrompt(params = {}) {
|
|
856
1260
|
const sections = [];
|
|
857
|
-
sections.push(
|
|
1261
|
+
sections.push([
|
|
1262
|
+
"You are AutoPilot, an AI agent controlling the current web page via tools.",
|
|
1263
|
+
"",
|
|
1264
|
+
"## Core Rules",
|
|
1265
|
+
"- Work from CURRENT snapshot + CURRENT remaining task directly. Do not restate the request.",
|
|
1266
|
+
"- Treat each round as task reduction:",
|
|
1267
|
+
" Input: (1) current remaining task, (2) previous round executed actions, (3) actions you execute this round.",
|
|
1268
|
+
" Output: new remaining task after removing this-round actions.",
|
|
1269
|
+
"- Use only visible targets from snapshot. Use #hashID as selector. Do not guess CSS selectors.",
|
|
1270
|
+
"- Batch independent visible actions in one round. Do not split one form into many rounds unnecessarily.",
|
|
1271
|
+
"- If an action will change DOM (open modal, navigate), stop after that action batch and continue next round with new snapshot.",
|
|
1272
|
+
"- Do NOT call page_info (snapshot/query/get_url/get_title). Snapshot is already provided every round.",
|
|
1273
|
+
"- For dropdown/select, use dom action=select_option (or fill on select).",
|
|
1274
|
+
"- Do NOT interact with AutoPilot UI unless user explicitly asks.",
|
|
1275
|
+
"",
|
|
1276
|
+
"## Output Contract",
|
|
1277
|
+
"- Return tool calls for this round.",
|
|
1278
|
+
"- Also include one plain text line:",
|
|
1279
|
+
" REMAINING: <new remaining task after this round>",
|
|
1280
|
+
" or REMAINING: DONE",
|
|
1281
|
+
"",
|
|
1282
|
+
"## Minimal Example",
|
|
1283
|
+
"Task: click button -> type \"abc\" in input -> send",
|
|
1284
|
+
"Round1 execute: click button",
|
|
1285
|
+
"Remaining: type \"abc\" in input -> send",
|
|
1286
|
+
"Round2 execute: type \"abc\" in input",
|
|
1287
|
+
"Remaining: send",
|
|
1288
|
+
"Round3 execute: send",
|
|
1289
|
+
"Remaining: DONE"
|
|
1290
|
+
].join("\n"));
|
|
858
1291
|
const tools = params.tools ?? [];
|
|
859
1292
|
if (tools.length > 0) {
|
|
860
1293
|
const toolLines = tools.map((t) => `- **${t.name}**: ${t.description}`);
|
|
861
1294
|
sections.push("## Available Tools\n\n" + toolLines.join("\n") + "\n\nUse tools when needed to complete the user's request.");
|
|
862
1295
|
}
|
|
1296
|
+
if (params.thinkingLevel) sections.push(["## Reasoning Profile", `- Thinking level: ${params.thinkingLevel}`].join("\n"));
|
|
1297
|
+
const extraInstructions = normalizeExtraInstructions(params.extraInstructions);
|
|
1298
|
+
if (extraInstructions.length > 0) sections.push(["## Extra Instructions", ...extraInstructions.map((line) => `- ${line}`)].join("\n"));
|
|
863
1299
|
return sections.join("\n\n");
|
|
864
1300
|
}
|
|
865
1301
|
|
|
866
1302
|
//#endregion
|
|
867
|
-
//#region src/web/dom-tool.ts
|
|
1303
|
+
//#region src/web/tools/dom-tool.ts
|
|
868
1304
|
/**
|
|
869
1305
|
* DOM Tool — 基于 Web API 的 DOM 操作工具。
|
|
870
1306
|
*
|
|
871
1307
|
* 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
|
|
872
1308
|
* 运行环境:浏览器 Content Script。
|
|
873
1309
|
*
|
|
874
|
-
* 支持
|
|
1310
|
+
* 支持 12 种动作:
|
|
875
1311
|
* click — 点击元素
|
|
876
|
-
* fill —
|
|
1312
|
+
* fill — 填写可编辑控件(input/textarea/select/contenteditable)
|
|
1313
|
+
* select_option — 选择下拉框选项(value/label)
|
|
877
1314
|
* type — 逐字符模拟键入
|
|
878
1315
|
* focus — 聚焦元素
|
|
879
1316
|
* hover — 鼠标悬停(触发 mouseenter/mouseover)
|
|
@@ -910,7 +1347,7 @@ function queryElement(selector) {
|
|
|
910
1347
|
const el = document.querySelector(selector);
|
|
911
1348
|
if (!el) return `未找到匹配 "${selector}" 的元素`;
|
|
912
1349
|
return el;
|
|
913
|
-
} catch
|
|
1350
|
+
} catch {
|
|
914
1351
|
return `选择器语法错误: ${selector}`;
|
|
915
1352
|
}
|
|
916
1353
|
}
|
|
@@ -988,11 +1425,11 @@ function createDomTool() {
|
|
|
988
1425
|
name: "dom",
|
|
989
1426
|
description: [
|
|
990
1427
|
"Perform DOM operations on the current page.",
|
|
991
|
-
"Actions: click, fill, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
1428
|
+
"Actions: click, fill, select_option, type, focus, hover, press, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
992
1429
|
"Use the hash ID from DOM snapshot (e.g. #a1b2c) as selector."
|
|
993
1430
|
].join(" "),
|
|
994
1431
|
schema: Type.Object({
|
|
995
|
-
action: Type.String({ description: "DOM action: click | fill | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
|
|
1432
|
+
action: Type.String({ description: "DOM action: click | fill | select_option | type | focus | hover | press | get_text | get_attr | set_attr | add_class | remove_class" }),
|
|
996
1433
|
selector: Type.String({ description: "Element ref ID from snapshot (e.g. #r0, #r5) or CSS selector" }),
|
|
997
1434
|
value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions" })),
|
|
998
1435
|
key: Type.Optional(Type.String({ description: "Key name for press action (e.g. Enter, Escape, Tab, ArrowDown, ArrowUp, Backspace, Delete, Space)" })),
|
|
@@ -1046,6 +1483,15 @@ function createDomTool() {
|
|
|
1046
1483
|
try {
|
|
1047
1484
|
switch (action) {
|
|
1048
1485
|
case "click":
|
|
1486
|
+
if (el instanceof HTMLOptionElement) {
|
|
1487
|
+
const parent = el.parentElement;
|
|
1488
|
+
if (parent instanceof HTMLSelectElement) {
|
|
1489
|
+
parent.focus();
|
|
1490
|
+
parent.value = el.value;
|
|
1491
|
+
dispatchInputEvents(parent);
|
|
1492
|
+
return { content: `已选择 ${describeElement(parent)} 的选项 "${el.value}"` };
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1049
1495
|
if (el instanceof HTMLElement) {
|
|
1050
1496
|
el.focus();
|
|
1051
1497
|
el.click();
|
|
@@ -1091,6 +1537,24 @@ function createDomTool() {
|
|
|
1091
1537
|
el.focus();
|
|
1092
1538
|
el.value = value;
|
|
1093
1539
|
dispatchInputEvents(el);
|
|
1540
|
+
} else if (el instanceof HTMLSelectElement) {
|
|
1541
|
+
el.focus();
|
|
1542
|
+
let matched = false;
|
|
1543
|
+
for (const option of Array.from(el.options)) if (option.value === value) {
|
|
1544
|
+
el.value = option.value;
|
|
1545
|
+
matched = true;
|
|
1546
|
+
break;
|
|
1547
|
+
}
|
|
1548
|
+
if (!matched) {
|
|
1549
|
+
const normalized = value.trim().toLowerCase();
|
|
1550
|
+
for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
|
|
1551
|
+
el.value = option.value;
|
|
1552
|
+
matched = true;
|
|
1553
|
+
break;
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
|
|
1557
|
+
dispatchInputEvents(el);
|
|
1094
1558
|
} else if (el instanceof HTMLElement && el.isContentEditable) {
|
|
1095
1559
|
el.focus();
|
|
1096
1560
|
el.textContent = value;
|
|
@@ -1098,6 +1562,29 @@ function createDomTool() {
|
|
|
1098
1562
|
} else return { content: `"${selector}" 不是可编辑元素` };
|
|
1099
1563
|
return { content: `已填写 ${describeElement(el)}: "${value}"` };
|
|
1100
1564
|
}
|
|
1565
|
+
case "select_option": {
|
|
1566
|
+
const value = params.value;
|
|
1567
|
+
if (value === void 0) return { content: "缺少 value 参数" };
|
|
1568
|
+
if (!(el instanceof HTMLSelectElement)) return { content: `"${selector}" 不是下拉框元素` };
|
|
1569
|
+
el.focus();
|
|
1570
|
+
let matched = false;
|
|
1571
|
+
for (const option of Array.from(el.options)) if (option.value === value) {
|
|
1572
|
+
el.value = option.value;
|
|
1573
|
+
matched = true;
|
|
1574
|
+
break;
|
|
1575
|
+
}
|
|
1576
|
+
if (!matched) {
|
|
1577
|
+
const normalized = value.trim().toLowerCase();
|
|
1578
|
+
for (const option of Array.from(el.options)) if (option.text.trim().toLowerCase() === normalized) {
|
|
1579
|
+
el.value = option.value;
|
|
1580
|
+
matched = true;
|
|
1581
|
+
break;
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
if (!matched) return { content: `"${selector}" 下拉框中不存在选项 "${value}"` };
|
|
1585
|
+
dispatchInputEvents(el);
|
|
1586
|
+
return { content: `已选择 ${describeElement(el)}: "${el.value}"` };
|
|
1587
|
+
}
|
|
1101
1588
|
case "type": {
|
|
1102
1589
|
const value = params.value;
|
|
1103
1590
|
if (value === void 0) return { content: "缺少 value 参数" };
|
|
@@ -1166,7 +1653,7 @@ function createDomTool() {
|
|
|
1166
1653
|
}
|
|
1167
1654
|
|
|
1168
1655
|
//#endregion
|
|
1169
|
-
//#region src/web/page-info-tool.ts
|
|
1656
|
+
//#region src/web/tools/page-info-tool.ts
|
|
1170
1657
|
/**
|
|
1171
1658
|
* Page Info Tool — 基于 Web API 的页面信息获取工具。
|
|
1172
1659
|
*
|
|
@@ -1206,6 +1693,11 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1206
1693
|
const maxDepth = opts.maxDepth ?? 6;
|
|
1207
1694
|
const viewportOnly = opts.viewportOnly ?? true;
|
|
1208
1695
|
const pruneLayout = opts.pruneLayout ?? true;
|
|
1696
|
+
const maxNodes = opts.maxNodes ?? 220;
|
|
1697
|
+
const maxChildren = opts.maxChildren ?? 25;
|
|
1698
|
+
const maxTextLength = opts.maxTextLength ?? 40;
|
|
1699
|
+
let emittedNodes = 0;
|
|
1700
|
+
let truncatedByNodeBudget = false;
|
|
1209
1701
|
const refStore = opts.refStore;
|
|
1210
1702
|
const SKIP_TAGS = new Set([
|
|
1211
1703
|
"SCRIPT",
|
|
@@ -1247,14 +1739,18 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1247
1739
|
"title",
|
|
1248
1740
|
"for",
|
|
1249
1741
|
"action",
|
|
1250
|
-
"method"
|
|
1251
|
-
"target",
|
|
1252
|
-
"min",
|
|
1253
|
-
"max",
|
|
1254
|
-
"pattern",
|
|
1255
|
-
"maxlength",
|
|
1256
|
-
"tabindex"
|
|
1742
|
+
"method"
|
|
1257
1743
|
];
|
|
1744
|
+
const INTERACTIVE_TAGS = new Set([
|
|
1745
|
+
"A",
|
|
1746
|
+
"BUTTON",
|
|
1747
|
+
"INPUT",
|
|
1748
|
+
"TEXTAREA",
|
|
1749
|
+
"SELECT",
|
|
1750
|
+
"OPTION",
|
|
1751
|
+
"LABEL",
|
|
1752
|
+
"SUMMARY"
|
|
1753
|
+
]);
|
|
1258
1754
|
/** 布尔状态属性 — 只在存在时输出(无值),如 disabled、checked */
|
|
1259
1755
|
const BOOLEAN_ATTRS = [
|
|
1260
1756
|
"disabled",
|
|
@@ -1262,13 +1758,8 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1262
1758
|
"readonly",
|
|
1263
1759
|
"required",
|
|
1264
1760
|
"selected",
|
|
1265
|
-
"hidden"
|
|
1266
|
-
"multiple",
|
|
1267
|
-
"autofocus",
|
|
1268
|
-
"open"
|
|
1761
|
+
"hidden"
|
|
1269
1762
|
];
|
|
1270
|
-
/** 内联事件属性前缀 */
|
|
1271
|
-
const EVENT_PREFIX = "on";
|
|
1272
1763
|
/**
|
|
1273
1764
|
* 计算元素在父节点中同标签兄弟里的序号(1-based,XPath 规范)。
|
|
1274
1765
|
* 如果同标签兄弟只有一个,返回空字符串(无需索引消歧)。
|
|
@@ -1311,9 +1802,22 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1311
1802
|
if (directText) return false;
|
|
1312
1803
|
return true;
|
|
1313
1804
|
}
|
|
1805
|
+
function isInteractiveElement(el) {
|
|
1806
|
+
if (INTERACTIVE_TAGS.has(el.tagName)) return true;
|
|
1807
|
+
if (el.hasAttribute("onclick")) return true;
|
|
1808
|
+
if (el.hasAttribute("role")) return true;
|
|
1809
|
+
if (el.hasAttribute("tabindex")) return true;
|
|
1810
|
+
if (el.hasAttribute("aria-label")) return true;
|
|
1811
|
+
return false;
|
|
1812
|
+
}
|
|
1314
1813
|
function walk(el, depth, parentPath) {
|
|
1814
|
+
if (emittedNodes >= maxNodes) {
|
|
1815
|
+
truncatedByNodeBudget = true;
|
|
1816
|
+
return "";
|
|
1817
|
+
}
|
|
1315
1818
|
if (depth > maxDepth) return "";
|
|
1316
1819
|
if (SKIP_TAGS.has(el.tagName)) return "";
|
|
1820
|
+
if (el.hasAttribute("data-autopilot-ignore")) return "";
|
|
1317
1821
|
const style = window.getComputedStyle(el);
|
|
1318
1822
|
if (style.display === "none" || style.visibility === "hidden") return "";
|
|
1319
1823
|
if (!isInViewport(el, depth)) return "";
|
|
@@ -1325,22 +1829,19 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1325
1829
|
if (elId) attrs.push(`id="${elId}"`);
|
|
1326
1830
|
const className = el.getAttribute("class")?.trim();
|
|
1327
1831
|
if (className) {
|
|
1328
|
-
const
|
|
1329
|
-
if (
|
|
1832
|
+
const cls = className.split(/\s+/).find((c) => c && !c.startsWith("data-v-") && c.length < 25 && !/^[a-z]{1,2}\d|^_|^css-/.test(c));
|
|
1833
|
+
if (cls) attrs.push(`class="${cls}"`);
|
|
1330
1834
|
}
|
|
1331
1835
|
for (const attr of INTERACTIVE_ATTRS) {
|
|
1332
1836
|
const val = el.getAttribute(attr);
|
|
1333
1837
|
if (val) attrs.push(`${attr}="${val}"`);
|
|
1334
1838
|
}
|
|
1335
1839
|
for (const attr of BOOLEAN_ATTRS) if (el.hasAttribute(attr)) attrs.push(attr);
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
if (
|
|
1339
|
-
const dataAttrs = [];
|
|
1340
|
-
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith("data-") && !attrObj.name.match(/^data-v-/) && dataAttrs.length < 2) dataAttrs.push(`${attrObj.name}="${attrObj.value.slice(0, 30)}"`);
|
|
1341
|
-
if (dataAttrs.length > 0) attrs.push(...dataAttrs);
|
|
1840
|
+
if (el.hasAttribute("onclick")) attrs.push("onclick");
|
|
1841
|
+
const testId = el.getAttribute("data-testid") || el.getAttribute("data-test-id");
|
|
1842
|
+
if (testId) attrs.push(`data-testid="${testId.slice(0, 25)}"`);
|
|
1342
1843
|
if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.value) {
|
|
1343
|
-
const currentVal = el.value.slice(0,
|
|
1844
|
+
const currentVal = el.value.slice(0, 40);
|
|
1344
1845
|
if (el.getAttribute("value") !== currentVal) attrs.push(`val="${currentVal}"`);
|
|
1345
1846
|
}
|
|
1346
1847
|
let directText = "";
|
|
@@ -1353,28 +1854,45 @@ function generateSnapshot(root = document.body, options = {}) {
|
|
|
1353
1854
|
}
|
|
1354
1855
|
directText = directText.trim();
|
|
1355
1856
|
if (isEmptyLayoutContainer(el, directText)) {
|
|
1857
|
+
const allChildren = Array.from(el.children);
|
|
1858
|
+
const interactiveChildren = allChildren.filter(isInteractiveElement);
|
|
1859
|
+
const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
|
|
1860
|
+
const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
|
|
1861
|
+
const selectedChildren = orderedChildren.slice(0, maxChildren);
|
|
1862
|
+
const omittedChildren = orderedChildren.length - selectedChildren.length;
|
|
1356
1863
|
const childLines = [];
|
|
1357
|
-
for (let i = 0; i <
|
|
1358
|
-
const childResult = walk(
|
|
1864
|
+
for (let i = 0; i < selectedChildren.length; i++) {
|
|
1865
|
+
const childResult = walk(selectedChildren[i], depth, currentPath);
|
|
1359
1866
|
if (childResult) childLines.push(childResult);
|
|
1360
1867
|
}
|
|
1868
|
+
if (omittedChildren > 0) childLines.push(`${" ".repeat(depth)}... (${omittedChildren} children omitted)`);
|
|
1361
1869
|
return childLines.join("\n");
|
|
1362
1870
|
}
|
|
1363
1871
|
let line = `${indent}[${tag}]`;
|
|
1364
|
-
if (directText) line += ` "${directText.slice(0,
|
|
1872
|
+
if (directText) line += ` "${directText.slice(0, maxTextLength)}"`;
|
|
1365
1873
|
if (attrs.length) line += ` ${attrs.join(" ")}`;
|
|
1366
1874
|
if (refStore) {
|
|
1367
1875
|
const hashId = refStore.set(el, currentPath);
|
|
1368
1876
|
line += ` #${hashId}`;
|
|
1369
1877
|
} else line += ` ref="${currentPath}"`;
|
|
1370
1878
|
const lines = [line];
|
|
1371
|
-
|
|
1372
|
-
|
|
1879
|
+
emittedNodes++;
|
|
1880
|
+
const allChildren = Array.from(el.children);
|
|
1881
|
+
const interactiveChildren = allChildren.filter(isInteractiveElement);
|
|
1882
|
+
const nonInteractiveChildren = allChildren.filter((child) => !isInteractiveElement(child));
|
|
1883
|
+
const orderedChildren = [...interactiveChildren, ...nonInteractiveChildren];
|
|
1884
|
+
const selectedChildren = orderedChildren.slice(0, maxChildren);
|
|
1885
|
+
const omittedChildren = orderedChildren.length - selectedChildren.length;
|
|
1886
|
+
for (let i = 0; i < selectedChildren.length; i++) {
|
|
1887
|
+
const childResult = walk(selectedChildren[i], depth + 1, currentPath);
|
|
1373
1888
|
if (childResult) lines.push(childResult);
|
|
1374
1889
|
}
|
|
1890
|
+
if (omittedChildren > 0) lines.push(`${indent} ... (${omittedChildren} children omitted)`);
|
|
1375
1891
|
return lines.join("\n");
|
|
1376
1892
|
}
|
|
1377
|
-
|
|
1893
|
+
const output = walk(root, 0, "") || "(空页面)";
|
|
1894
|
+
if (!truncatedByNodeBudget) return output;
|
|
1895
|
+
return `${output}\n... (snapshot truncated: maxNodes=${maxNodes})`;
|
|
1378
1896
|
}
|
|
1379
1897
|
/**
|
|
1380
1898
|
* 查询所有匹配元素并返回摘要信息(标签、文本、关键属性)。
|
|
@@ -1395,7 +1913,7 @@ function queryAllElements(selector, limit = 20) {
|
|
|
1395
1913
|
}
|
|
1396
1914
|
if (elements.length > limit) results.push(` ...还有 ${elements.length - limit} 个元素`);
|
|
1397
1915
|
return results.join("\n");
|
|
1398
|
-
} catch
|
|
1916
|
+
} catch {
|
|
1399
1917
|
return `选择器语法错误: ${selector}`;
|
|
1400
1918
|
}
|
|
1401
1919
|
}
|
|
@@ -1412,7 +1930,10 @@ function createPageInfoTool() {
|
|
|
1412
1930
|
selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
|
|
1413
1931
|
maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
|
|
1414
1932
|
viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
|
|
1415
|
-
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" }))
|
|
1933
|
+
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" })),
|
|
1934
|
+
maxNodes: Type.Optional(Type.Number({ description: "Maximum nodes to include in snapshot (default: 220)" })),
|
|
1935
|
+
maxChildren: Type.Optional(Type.Number({ description: "Maximum children per element (default: 25)" })),
|
|
1936
|
+
maxTextLength: Type.Optional(Type.Number({ description: "Maximum text length per node (default: 40)" }))
|
|
1416
1937
|
}),
|
|
1417
1938
|
execute: async (params) => {
|
|
1418
1939
|
const action = params.action;
|
|
@@ -1436,10 +1957,16 @@ function createPageInfoTool() {
|
|
|
1436
1957
|
const maxDepth = params.maxDepth ?? 6;
|
|
1437
1958
|
const viewportOnly = params.viewportOnly ?? true;
|
|
1438
1959
|
const pruneLayout = params.pruneLayout ?? true;
|
|
1960
|
+
const maxNodes = params.maxNodes ?? 220;
|
|
1961
|
+
const maxChildren = params.maxChildren ?? 25;
|
|
1962
|
+
const maxTextLength = params.maxTextLength ?? 40;
|
|
1439
1963
|
return { content: generateSnapshot(document.body, {
|
|
1440
1964
|
maxDepth,
|
|
1441
1965
|
viewportOnly,
|
|
1442
1966
|
pruneLayout,
|
|
1967
|
+
maxNodes,
|
|
1968
|
+
maxChildren,
|
|
1969
|
+
maxTextLength,
|
|
1443
1970
|
refStore: getActiveRefStore()
|
|
1444
1971
|
}) };
|
|
1445
1972
|
}
|
|
@@ -1464,7 +1991,7 @@ function createPageInfoTool() {
|
|
|
1464
1991
|
}
|
|
1465
1992
|
|
|
1466
1993
|
//#endregion
|
|
1467
|
-
//#region src/web/navigate-tool.ts
|
|
1994
|
+
//#region src/web/tools/navigate-tool.ts
|
|
1468
1995
|
/**
|
|
1469
1996
|
* Navigate Tool — 基于 Web API 的页面导航工具。
|
|
1470
1997
|
*
|
|
@@ -1544,7 +2071,7 @@ function createNavigateTool() {
|
|
|
1544
2071
|
}
|
|
1545
2072
|
|
|
1546
2073
|
//#endregion
|
|
1547
|
-
//#region src/web/wait-tool.ts
|
|
2074
|
+
//#region src/web/tools/wait-tool.ts
|
|
1548
2075
|
/**
|
|
1549
2076
|
* Wait Tool — 基于 MutationObserver 的元素等待工具。
|
|
1550
2077
|
*
|
|
@@ -1714,7 +2241,7 @@ function createWaitTool() {
|
|
|
1714
2241
|
}
|
|
1715
2242
|
|
|
1716
2243
|
//#endregion
|
|
1717
|
-
//#region src/web/evaluate-tool.ts
|
|
2244
|
+
//#region src/web/tools/evaluate-tool.ts
|
|
1718
2245
|
/**
|
|
1719
2246
|
* Evaluate Tool — 在页面上下文中执行任意 JavaScript 表达式。
|
|
1720
2247
|
*
|
|
@@ -1734,7 +2261,7 @@ function createWaitTool() {
|
|
|
1734
2261
|
function safeEvaluate(expression) {
|
|
1735
2262
|
try {
|
|
1736
2263
|
return { result: new Function(`"use strict"; return (${expression});`)() };
|
|
1737
|
-
} catch
|
|
2264
|
+
} catch {
|
|
1738
2265
|
try {
|
|
1739
2266
|
return { result: new Function(`"use strict"; ${expression}`)() };
|
|
1740
2267
|
} catch (err2) {
|
|
@@ -2000,6 +2527,7 @@ var WebAgent = class {
|
|
|
2000
2527
|
provider;
|
|
2001
2528
|
model;
|
|
2002
2529
|
baseURL;
|
|
2530
|
+
stream;
|
|
2003
2531
|
dryRun;
|
|
2004
2532
|
maxRounds;
|
|
2005
2533
|
customSystemPrompt;
|
|
@@ -2021,8 +2549,9 @@ var WebAgent = class {
|
|
|
2021
2549
|
this.provider = options.provider ?? "copilot";
|
|
2022
2550
|
this.model = options.model ?? "gpt-4o";
|
|
2023
2551
|
this.baseURL = options.baseURL;
|
|
2552
|
+
this.stream = options.stream ?? true;
|
|
2024
2553
|
this.dryRun = options.dryRun ?? false;
|
|
2025
|
-
this.maxRounds = options.maxRounds ??
|
|
2554
|
+
this.maxRounds = options.maxRounds ?? 40;
|
|
2026
2555
|
this.customSystemPrompt = options.systemPrompt;
|
|
2027
2556
|
this.memory = options.memory ?? false;
|
|
2028
2557
|
this.autoSnapshot = options.autoSnapshot ?? true;
|
|
@@ -2065,6 +2594,14 @@ var WebAgent = class {
|
|
|
2065
2594
|
setModel(model) {
|
|
2066
2595
|
this.model = model;
|
|
2067
2596
|
}
|
|
2597
|
+
/** 设置是否启用流式输出(SSE) */
|
|
2598
|
+
setStream(enabled) {
|
|
2599
|
+
this.stream = enabled;
|
|
2600
|
+
}
|
|
2601
|
+
/** 获取当前流式输出开关状态 */
|
|
2602
|
+
getStream() {
|
|
2603
|
+
return this.stream;
|
|
2604
|
+
}
|
|
2068
2605
|
/** 切换干运行模式 */
|
|
2069
2606
|
setDryRun(enabled) {
|
|
2070
2607
|
this.dryRun = enabled;
|
|
@@ -2116,14 +2653,19 @@ var WebAgent = class {
|
|
|
2116
2653
|
let systemPrompt = this.customSystemPrompt ?? buildSystemPrompt({ tools: this.registry.getDefinitions() });
|
|
2117
2654
|
const refStore = new RefStore(globalThis.location?.href);
|
|
2118
2655
|
setActiveRefStore(refStore);
|
|
2119
|
-
|
|
2656
|
+
let initialSnapshot;
|
|
2657
|
+
try {
|
|
2120
2658
|
const snapshot = generateSnapshot(document.body, {
|
|
2121
2659
|
maxDepth: 8,
|
|
2660
|
+
viewportOnly: false,
|
|
2661
|
+
maxNodes: 500,
|
|
2662
|
+
maxChildren: 30,
|
|
2122
2663
|
...this.snapshotOptions,
|
|
2123
2664
|
refStore
|
|
2124
2665
|
});
|
|
2125
|
-
|
|
2126
|
-
|
|
2666
|
+
initialSnapshot = snapshot;
|
|
2667
|
+
if (this.autoSnapshot) this.callbacks.onSnapshot?.(snapshot);
|
|
2668
|
+
systemPrompt += wrapSnapshot(`\n\n## DOM Snapshot\n\`\`\`\n${snapshot}\n\`\`\``);
|
|
2127
2669
|
} catch {}
|
|
2128
2670
|
const wrappedCallbacks = {
|
|
2129
2671
|
...this.callbacks,
|
|
@@ -2138,6 +2680,7 @@ var WebAgent = class {
|
|
|
2138
2680
|
registry: this.registry,
|
|
2139
2681
|
systemPrompt,
|
|
2140
2682
|
message,
|
|
2683
|
+
initialSnapshot,
|
|
2141
2684
|
history: this.memory ? this.history : void 0,
|
|
2142
2685
|
dryRun: this.dryRun,
|
|
2143
2686
|
maxRounds: this.maxRounds,
|
|
@@ -2159,7 +2702,8 @@ var WebAgent = class {
|
|
|
2159
2702
|
provider: this.provider,
|
|
2160
2703
|
model: this.model,
|
|
2161
2704
|
apiKey: this.token,
|
|
2162
|
-
baseURL: this.baseURL
|
|
2705
|
+
baseURL: this.baseURL,
|
|
2706
|
+
stream: this.stream
|
|
2163
2707
|
});
|
|
2164
2708
|
}
|
|
2165
2709
|
};
|