agentpage 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +331 -0
- package/dist/index.d.mts +320 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +1987 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +53 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1987 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
|
|
3
|
+
//#region src/core/agent-loop/constants.ts
|
|
4
|
+
/**
|
|
5
|
+
* Agent Loop 默认配置常量。
|
|
6
|
+
*
|
|
7
|
+
* 统一集中在该文件,避免在主循环中散落“魔法数字”。
|
|
8
|
+
*/
|
|
9
|
+
const DEFAULT_MAX_ROUNDS = 10;
|
|
10
|
+
const DEFAULT_RECOVERY_WAIT_MS = 1e3;
|
|
11
|
+
const DEFAULT_ACTION_RECOVERY_ROUNDS = 5;
|
|
12
|
+
|
|
13
|
+
//#endregion
|
|
14
|
+
//#region src/core/agent-loop/helpers.ts
|
|
15
|
+
/** 异步睡眠,确保恢复重试按顺序串行执行。 */
|
|
16
|
+
function sleep$1(ms) {
|
|
17
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
18
|
+
}
|
|
19
|
+
/** 将工具返回内容统一转为字符串,便于拼接进消息。 */
|
|
20
|
+
function toContentString(content) {
|
|
21
|
+
return typeof content === "string" ? content : JSON.stringify(content, null, 2);
|
|
22
|
+
}
|
|
23
|
+
/** 判定工具失败是否属于“元素不存在”,用于触发快照恢复。 */
|
|
24
|
+
function isElementNotFoundResult(result) {
|
|
25
|
+
const details = result.details;
|
|
26
|
+
if (details && typeof details === "object") {
|
|
27
|
+
if (details.code === "ELEMENT_NOT_FOUND") return true;
|
|
28
|
+
}
|
|
29
|
+
const content = toContentString(result.content);
|
|
30
|
+
return content.includes("未找到") && content.includes("元素");
|
|
31
|
+
}
|
|
32
|
+
/** 为同一动作构造稳定 key,用于统计恢复重试次数。 */
|
|
33
|
+
function buildToolCallKey(name, input) {
|
|
34
|
+
return `${name}:${JSON.stringify(input)}`;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* 解析恢复等待时长:
|
|
38
|
+
* - 优先 `waitMs`
|
|
39
|
+
* - 其次 `waitSeconds`
|
|
40
|
+
* - 最后回退默认值
|
|
41
|
+
*/
|
|
42
|
+
function resolveRecoveryWaitMs(input) {
|
|
43
|
+
if (!input || typeof input !== "object") return DEFAULT_RECOVERY_WAIT_MS;
|
|
44
|
+
const params = input;
|
|
45
|
+
const waitMs = params.waitMs;
|
|
46
|
+
if (typeof waitMs === "number" && Number.isFinite(waitMs)) return Math.max(0, Math.floor(waitMs));
|
|
47
|
+
const waitSeconds = params.waitSeconds;
|
|
48
|
+
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
49
|
+
return DEFAULT_RECOVERY_WAIT_MS;
|
|
50
|
+
}
|
|
51
|
+
/** 将工具输入压缩成简短文本,用于轨迹展示。 */
|
|
52
|
+
function formatToolInputBrief(input) {
|
|
53
|
+
if (!input || typeof input !== "object") return "";
|
|
54
|
+
const params = input;
|
|
55
|
+
const parts = [];
|
|
56
|
+
for (const key of [
|
|
57
|
+
"action",
|
|
58
|
+
"selector",
|
|
59
|
+
"waitMs",
|
|
60
|
+
"waitSeconds",
|
|
61
|
+
"url",
|
|
62
|
+
"text"
|
|
63
|
+
]) {
|
|
64
|
+
const value = params[key];
|
|
65
|
+
if (value === void 0 || value === null) continue;
|
|
66
|
+
if (typeof value === "string") parts.push(`${key}=${JSON.stringify(value).slice(0, 80)}`);
|
|
67
|
+
else if (typeof value === "number" || typeof value === "boolean") parts.push(`${key}=${String(value)}`);
|
|
68
|
+
}
|
|
69
|
+
if (parts.length === 0) return "";
|
|
70
|
+
return ` (${parts.join(", ")})`;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* 将完整轨迹格式化为可读文本。
|
|
74
|
+
* 支持附加“当前步骤”用于在恢复提示中高亮失败动作。
|
|
75
|
+
*/
|
|
76
|
+
function buildToolTrace(trace, current) {
|
|
77
|
+
const lines = trace.map((entry, index) => {
|
|
78
|
+
const code = entry.result.details && typeof entry.result.details === "object" ? entry.result.details.code : void 0;
|
|
79
|
+
const codeText = typeof code === "string" ? ` [${code}]` : "";
|
|
80
|
+
const marker = entry.marker ? ` ${entry.marker}` : "";
|
|
81
|
+
return `${index + 1}. [round ${entry.round}] ${entry.name}${formatToolInputBrief(entry.input)}${codeText}${marker}`;
|
|
82
|
+
});
|
|
83
|
+
if (current) {
|
|
84
|
+
const code = current.result?.details && typeof current.result.details === "object" ? current.result.details.code : void 0;
|
|
85
|
+
const codeText = typeof code === "string" ? ` [${code}]` : "";
|
|
86
|
+
const marker = current.marker ? ` ${current.marker}` : "";
|
|
87
|
+
lines.push(`${lines.length + 1}. [round ${current.round}] ${current.name}${formatToolInputBrief(current.input)}${codeText}${marker}`);
|
|
88
|
+
}
|
|
89
|
+
return lines.length > 0 ? lines.join("\n") : "(暂无工具执行记录)";
|
|
90
|
+
}
|
|
91
|
+
/** 从工具参数中读取 action。 */
|
|
92
|
+
function getToolAction(input) {
|
|
93
|
+
if (!input || typeof input !== "object") return void 0;
|
|
94
|
+
const action = input.action;
|
|
95
|
+
return typeof action === "string" ? action : void 0;
|
|
96
|
+
}
|
|
97
|
+
/** 判定工具结果是否标记 error。 */
|
|
98
|
+
function hasToolError(result) {
|
|
99
|
+
return result.details && typeof result.details === "object" ? Boolean(result.details.error) : false;
|
|
100
|
+
}
|
|
101
|
+
/** 读取当前页面 URL(通过 page_info 工具)。 */
|
|
102
|
+
async function readPageUrl(registry) {
|
|
103
|
+
const result = await registry.dispatch("page_info", { action: "get_url" });
|
|
104
|
+
return typeof result.content === "string" ? result.content : void 0;
|
|
105
|
+
}
|
|
106
|
+
/** 读取当前页面快照(通过 page_info 工具)。 */
|
|
107
|
+
async function readPageSnapshot(registry, maxDepth = 8) {
|
|
108
|
+
return toContentString((await registry.dispatch("page_info", {
|
|
109
|
+
action: "snapshot",
|
|
110
|
+
maxDepth
|
|
111
|
+
})).content);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
//#endregion
|
|
115
|
+
//#region src/core/agent-loop/index.ts
|
|
116
|
+
/**
|
|
117
|
+
* 执行 Agent 决策循环(环境无关)。
|
|
118
|
+
*
|
|
119
|
+
* 完整流程:
|
|
120
|
+
* 1. 获取已注册的工具列表
|
|
121
|
+
* 2. 循环:发消息给 AI → 检查是否返回 tool_call → 执行 → 反馈 → 继续
|
|
122
|
+
* 3. AI 不再调用工具时,返回最终回复
|
|
123
|
+
*/
|
|
124
|
+
async function executeAgentLoop(params) {
|
|
125
|
+
const { client, registry, systemPrompt, message, history, dryRun = false, maxRounds = DEFAULT_MAX_ROUNDS, callbacks } = params;
|
|
126
|
+
const tools = registry.getDefinitions();
|
|
127
|
+
const messages = [...history ?? [], {
|
|
128
|
+
role: "user",
|
|
129
|
+
content: message
|
|
130
|
+
}];
|
|
131
|
+
const allToolCalls = [];
|
|
132
|
+
const fullToolTrace = [];
|
|
133
|
+
const actionRecoveryAttempts = /* @__PURE__ */ new Map();
|
|
134
|
+
const pageContext = { needsSnapshotBeforeDom: false };
|
|
135
|
+
let finalReply = "";
|
|
136
|
+
for (let round = 0; round < maxRounds; round++) {
|
|
137
|
+
callbacks?.onRound?.(round);
|
|
138
|
+
const response = await client.chat({
|
|
139
|
+
systemPrompt,
|
|
140
|
+
messages,
|
|
141
|
+
tools
|
|
142
|
+
});
|
|
143
|
+
if (!response.toolCalls || response.toolCalls.length === 0) {
|
|
144
|
+
finalReply = response.text ?? "";
|
|
145
|
+
if (finalReply) callbacks?.onText?.(finalReply);
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
if (response.text) callbacks?.onText?.(response.text);
|
|
149
|
+
if (dryRun) {
|
|
150
|
+
finalReply = response.text ? response.text + "\n\n" : "";
|
|
151
|
+
finalReply += "🔧 AI 请求调用以下工具(dry-run 模式,未执行):\n";
|
|
152
|
+
for (const tc of response.toolCalls) {
|
|
153
|
+
callbacks?.onToolCall?.(tc.name, tc.input);
|
|
154
|
+
finalReply += `\n┌─ 工具: ${tc.name}\n`;
|
|
155
|
+
finalReply += `│ ID: ${tc.id}\n`;
|
|
156
|
+
finalReply += `│ 参数:\n`;
|
|
157
|
+
const inputStr = JSON.stringify(tc.input, null, 2);
|
|
158
|
+
for (const line of inputStr.split("\n")) finalReply += `│ ${line}\n`;
|
|
159
|
+
finalReply += `└────────────────────\n`;
|
|
160
|
+
}
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
const toolResults = [];
|
|
164
|
+
for (const tc of response.toolCalls) {
|
|
165
|
+
callbacks?.onToolCall?.(tc.name, tc.input);
|
|
166
|
+
const latestUrl = await readPageUrl(registry);
|
|
167
|
+
if (latestUrl) {
|
|
168
|
+
if (!pageContext.currentUrl) pageContext.currentUrl = latestUrl;
|
|
169
|
+
else if (latestUrl !== pageContext.currentUrl) {
|
|
170
|
+
pageContext.currentUrl = latestUrl;
|
|
171
|
+
pageContext.needsSnapshotBeforeDom = true;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
if (tc.name === "dom" && pageContext.needsSnapshotBeforeDom) {
|
|
175
|
+
const snapshotText = await readPageSnapshot(registry, 8);
|
|
176
|
+
pageContext.latestSnapshot = snapshotText;
|
|
177
|
+
pageContext.needsSnapshotBeforeDom = false;
|
|
178
|
+
const result = {
|
|
179
|
+
content: [
|
|
180
|
+
`检测到页面 URL 变化:${pageContext.currentUrl ?? "(未知)"}`,
|
|
181
|
+
"已在执行 DOM 操作前生成最新快照,请基于该快照重新定位目标元素后重试当前工具调用。",
|
|
182
|
+
"",
|
|
183
|
+
"本次对话任务完整工具轨迹:",
|
|
184
|
+
buildToolTrace(fullToolTrace, {
|
|
185
|
+
round,
|
|
186
|
+
name: tc.name,
|
|
187
|
+
input: tc.input,
|
|
188
|
+
marker: "[URL变化待重定位]"
|
|
189
|
+
}),
|
|
190
|
+
"",
|
|
191
|
+
"最新页面快照:",
|
|
192
|
+
snapshotText
|
|
193
|
+
].join("\n"),
|
|
194
|
+
details: {
|
|
195
|
+
error: true,
|
|
196
|
+
code: "URL_CHANGED_REQUIRE_NEW_SNAPSHOT",
|
|
197
|
+
url: pageContext.currentUrl
|
|
198
|
+
}
|
|
199
|
+
};
|
|
200
|
+
allToolCalls.push({
|
|
201
|
+
name: tc.name,
|
|
202
|
+
input: tc.input,
|
|
203
|
+
result
|
|
204
|
+
});
|
|
205
|
+
fullToolTrace.push({
|
|
206
|
+
round,
|
|
207
|
+
name: tc.name,
|
|
208
|
+
input: tc.input,
|
|
209
|
+
result,
|
|
210
|
+
marker: "[URL变化待重定位]"
|
|
211
|
+
});
|
|
212
|
+
callbacks?.onToolResult?.(tc.name, result);
|
|
213
|
+
toolResults.push({
|
|
214
|
+
toolCallId: tc.id,
|
|
215
|
+
result: toContentString(result.content)
|
|
216
|
+
});
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
let result = await registry.dispatch(tc.name, tc.input);
|
|
220
|
+
if (tc.name === "dom" && isElementNotFoundResult(result)) {
|
|
221
|
+
const key = buildToolCallKey(tc.name, tc.input);
|
|
222
|
+
const attempts = (actionRecoveryAttempts.get(key) ?? 0) + 1;
|
|
223
|
+
actionRecoveryAttempts.set(key, attempts);
|
|
224
|
+
const recoveryWaitMs = resolveRecoveryWaitMs(tc.input);
|
|
225
|
+
if (attempts <= DEFAULT_ACTION_RECOVERY_ROUNDS) {
|
|
226
|
+
await sleep$1(recoveryWaitMs);
|
|
227
|
+
const snapshotText = await readPageSnapshot(registry, 8);
|
|
228
|
+
pageContext.latestSnapshot = snapshotText;
|
|
229
|
+
const originalError = toContentString(result.content);
|
|
230
|
+
const fullTrace = buildToolTrace(fullToolTrace, {
|
|
231
|
+
round,
|
|
232
|
+
name: tc.name,
|
|
233
|
+
input: tc.input,
|
|
234
|
+
result,
|
|
235
|
+
marker: "[当前失败]"
|
|
236
|
+
});
|
|
237
|
+
result = {
|
|
238
|
+
content: [
|
|
239
|
+
originalError,
|
|
240
|
+
"",
|
|
241
|
+
`自动恢复 ${attempts}/${DEFAULT_ACTION_RECOVERY_ROUNDS}:等待 ${recoveryWaitMs}ms 后重新获取页面快照。`,
|
|
242
|
+
"本次对话任务完整工具轨迹(含本次失败):",
|
|
243
|
+
fullTrace,
|
|
244
|
+
"请根据下方最新快照,重新定位本次操作目标元素并再次调用工具。",
|
|
245
|
+
"",
|
|
246
|
+
"最新页面快照:",
|
|
247
|
+
snapshotText
|
|
248
|
+
].join("\n"),
|
|
249
|
+
details: {
|
|
250
|
+
error: true,
|
|
251
|
+
code: "ELEMENT_NOT_FOUND_RECOVERY",
|
|
252
|
+
recoveryAttempt: attempts,
|
|
253
|
+
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS,
|
|
254
|
+
waitMs: recoveryWaitMs
|
|
255
|
+
}
|
|
256
|
+
};
|
|
257
|
+
} else {
|
|
258
|
+
const originalError = toContentString(result.content);
|
|
259
|
+
const fullTrace = buildToolTrace(fullToolTrace, {
|
|
260
|
+
round,
|
|
261
|
+
name: tc.name,
|
|
262
|
+
input: tc.input,
|
|
263
|
+
result,
|
|
264
|
+
marker: "[超过恢复上限]"
|
|
265
|
+
});
|
|
266
|
+
result = {
|
|
267
|
+
content: [
|
|
268
|
+
originalError,
|
|
269
|
+
"",
|
|
270
|
+
`已达到最大自动恢复次数(${DEFAULT_ACTION_RECOVERY_ROUNDS})。请根据当前页面状态调整操作目标后重试。`,
|
|
271
|
+
"本次对话任务完整工具轨迹:",
|
|
272
|
+
fullTrace
|
|
273
|
+
].join("\n"),
|
|
274
|
+
details: {
|
|
275
|
+
error: true,
|
|
276
|
+
code: "ELEMENT_NOT_FOUND_MAX_RECOVERY_REACHED",
|
|
277
|
+
recoveryAttempt: attempts,
|
|
278
|
+
recoveryMaxRounds: DEFAULT_ACTION_RECOVERY_ROUNDS,
|
|
279
|
+
waitMs: recoveryWaitMs
|
|
280
|
+
}
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
allToolCalls.push({
|
|
285
|
+
name: tc.name,
|
|
286
|
+
input: tc.input,
|
|
287
|
+
result
|
|
288
|
+
});
|
|
289
|
+
fullToolTrace.push({
|
|
290
|
+
round,
|
|
291
|
+
name: tc.name,
|
|
292
|
+
input: tc.input,
|
|
293
|
+
result
|
|
294
|
+
});
|
|
295
|
+
if (tc.name === "navigate") {
|
|
296
|
+
const action = getToolAction(tc.input);
|
|
297
|
+
if (action === "goto" || action === "back" || action === "forward" || action === "reload") {
|
|
298
|
+
if (!hasToolError(result)) pageContext.needsSnapshotBeforeDom = true;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
callbacks?.onToolResult?.(tc.name, result);
|
|
302
|
+
toolResults.push({
|
|
303
|
+
toolCallId: tc.id,
|
|
304
|
+
result: typeof result.content === "string" ? result.content : JSON.stringify(result.content)
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
messages.push({
|
|
308
|
+
role: "assistant",
|
|
309
|
+
content: response.text ?? "",
|
|
310
|
+
toolCalls: response.toolCalls
|
|
311
|
+
});
|
|
312
|
+
messages.push({
|
|
313
|
+
role: "tool",
|
|
314
|
+
content: toolResults
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
if (finalReply) messages.push({
|
|
318
|
+
role: "assistant",
|
|
319
|
+
content: finalReply
|
|
320
|
+
});
|
|
321
|
+
return {
|
|
322
|
+
reply: finalReply,
|
|
323
|
+
toolCalls: allToolCalls,
|
|
324
|
+
messages
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
//#endregion
|
|
329
|
+
//#region src/core/ai-client/constants.ts
|
|
330
|
+
/**
|
|
331
|
+
* 各 Provider 的默认 API 端点。
|
|
332
|
+
*
|
|
333
|
+
* - openai → OpenAI 官方 API
|
|
334
|
+
* - copilot → GitHub Models API(使用 OpenAI 兼容格式)
|
|
335
|
+
* - anthropic → Anthropic Messages API
|
|
336
|
+
*/
|
|
337
|
+
const PROVIDER_ENDPOINTS = {
|
|
338
|
+
openai: "https://api.openai.com/v1",
|
|
339
|
+
copilot: "https://models.inference.ai.azure.com",
|
|
340
|
+
anthropic: "https://api.anthropic.com"
|
|
341
|
+
};
|
|
342
|
+
/**
|
|
343
|
+
* 校验 provider 是否受支持。
|
|
344
|
+
*
|
|
345
|
+
* @throws 不支持的 provider 抛出 Error,附带支持列表
|
|
346
|
+
*/
|
|
347
|
+
function validateProvider(provider) {
|
|
348
|
+
if (!PROVIDER_ENDPOINTS[provider]) {
|
|
349
|
+
const supported = Object.keys(PROVIDER_ENDPOINTS).join(", ");
|
|
350
|
+
throw new Error(`Unknown AI provider: ${provider}. Supported: ${supported}`);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* 解析 provider 对应的 API 基础 URL。
|
|
355
|
+
*
|
|
356
|
+
* 优先使用用户自定义的 baseURL(如本地 Ollama),
|
|
357
|
+
* 其次使用 PROVIDER_ENDPOINTS 中的默认值。
|
|
358
|
+
*/
|
|
359
|
+
function resolveBaseURL(config) {
|
|
360
|
+
return config.baseURL ?? PROVIDER_ENDPOINTS[config.provider] ?? "";
|
|
361
|
+
}
|
|
362
|
+
/**
|
|
363
|
+
* 清理 TypeBox Schema — 去除 Symbol 等不可序列化的属性。
|
|
364
|
+
*
|
|
365
|
+
* TypeBox 的 Type.Object() 产物包含 Symbol key(如 [Kind]、[Hint]),
|
|
366
|
+
* 这些 Symbol 在 JSON.stringify 时会被忽略,但某些 AI API 端点
|
|
367
|
+
* 对 JSON Schema 做严格校验时可能报错。
|
|
368
|
+
*
|
|
369
|
+
* 通过 JSON roundtrip(stringify → parse)清理掉所有不可序列化的属性。
|
|
370
|
+
*/
|
|
371
|
+
function cleanSchema(schema) {
|
|
372
|
+
return JSON.parse(JSON.stringify(schema));
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
//#endregion
|
|
376
|
+
//#region src/core/ai-client/custom.ts
|
|
377
|
+
/**
|
|
378
|
+
* 可继承的 AI 客户端基类 — 实现 AIClient 接口。
|
|
379
|
+
*
|
|
380
|
+
* 设计原则:
|
|
381
|
+
* - 实现 `AIClient` 接口 → 可直接传入 `executeAgentLoop()` 和 `WebAgent`
|
|
382
|
+
* - 构造时注入 `chatHandler` → 无需继承即可自定义对话逻辑
|
|
383
|
+
* - `chat()` 方法可被子类覆盖 → 支持继承式扩展(添加中间件逻辑)
|
|
384
|
+
*/
|
|
385
|
+
var BaseAIClient = class {
|
|
386
|
+
/** 用户提供的对话处理函数 */
|
|
387
|
+
chatHandler;
|
|
388
|
+
constructor(options) {
|
|
389
|
+
this.chatHandler = options.chatHandler;
|
|
390
|
+
}
|
|
391
|
+
/**
|
|
392
|
+
* 发送对话请求并获取 AI 响应。
|
|
393
|
+
*
|
|
394
|
+
* 默认实现直接委托给 `chatHandler`。
|
|
395
|
+
* 子类可覆盖此方法添加中间件逻辑(日志、重试、缓存等)。
|
|
396
|
+
*
|
|
397
|
+
* @param params - 统一格式的聊天参数
|
|
398
|
+
* @returns 统一格式的 AI 响应
|
|
399
|
+
*/
|
|
400
|
+
async chat(params) {
|
|
401
|
+
return this.chatHandler(params);
|
|
402
|
+
}
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
//#endregion
|
|
406
|
+
//#region src/core/ai-client/openai.ts
|
|
407
|
+
/**
|
|
408
|
+
* OpenAI / Copilot AI 客户端 — 继承 BaseAIClient。
|
|
409
|
+
*
|
|
410
|
+
* 封装完整的 OpenAI Chat Completions API 调用流程:
|
|
411
|
+
* 1. buildOpenAIRequest() → 构建 HTTP 请求
|
|
412
|
+
* 2. fetch() → 发送请求
|
|
413
|
+
* 3. parseOpenAIResponse() → 解析响应为统一格式
|
|
414
|
+
*
|
|
415
|
+
* 使用示例:
|
|
416
|
+
* ```ts
|
|
417
|
+
* const client = new OpenAIClient({
|
|
418
|
+
* provider: "openai",
|
|
419
|
+
* model: "gpt-4o",
|
|
420
|
+
* apiKey: "sk-xxx",
|
|
421
|
+
* });
|
|
422
|
+
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
423
|
+
* ```
|
|
424
|
+
*
|
|
425
|
+
* 也可用于 Copilot(GitHub Models):
|
|
426
|
+
* ```ts
|
|
427
|
+
* const client = new OpenAIClient({
|
|
428
|
+
* provider: "copilot",
|
|
429
|
+
* model: "gpt-4o",
|
|
430
|
+
* apiKey: "ghp_xxx",
|
|
431
|
+
* });
|
|
432
|
+
* ```
|
|
433
|
+
*/
|
|
434
|
+
var OpenAIClient = class extends BaseAIClient {
|
|
435
|
+
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
436
|
+
config;
|
|
437
|
+
constructor(config) {
|
|
438
|
+
super({ chatHandler: async (params) => {
|
|
439
|
+
const req = buildOpenAIRequest(this.config, params);
|
|
440
|
+
const res = await fetch(req.url, {
|
|
441
|
+
method: req.method,
|
|
442
|
+
headers: req.headers,
|
|
443
|
+
body: req.body
|
|
444
|
+
});
|
|
445
|
+
if (!res.ok) {
|
|
446
|
+
const errText = await res.text();
|
|
447
|
+
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
448
|
+
}
|
|
449
|
+
return parseOpenAIResponse(await res.json());
|
|
450
|
+
} });
|
|
451
|
+
this.config = config;
|
|
452
|
+
}
|
|
453
|
+
};
|
|
454
|
+
/**
|
|
455
|
+
* 将统一格式的 ChatParams 转换为 OpenAI Chat Completions API 请求。
|
|
456
|
+
*
|
|
457
|
+
* 转换逻辑:
|
|
458
|
+
* - system prompt → `{ role: "system", content }` 消息
|
|
459
|
+
* - 工具定义 → `tools` 数组(function calling 格式)
|
|
460
|
+
* - 工具结果 → 拆分为多条 `{ role: "tool", tool_call_id }` 消息
|
|
461
|
+
* - AI 回复含工具调用 → `tool_calls` 字段
|
|
462
|
+
*
|
|
463
|
+
* 默认参数:temperature=0.3, max_tokens=8192, tool_choice="auto"
|
|
464
|
+
*/
|
|
465
|
+
function buildOpenAIRequest(config, params) {
|
|
466
|
+
const baseURL = resolveBaseURL(config);
|
|
467
|
+
const { systemPrompt, messages, tools } = params;
|
|
468
|
+
const openaiTools = tools?.map((t) => ({
|
|
469
|
+
type: "function",
|
|
470
|
+
function: {
|
|
471
|
+
name: t.name,
|
|
472
|
+
description: t.description,
|
|
473
|
+
parameters: cleanSchema(t.schema)
|
|
474
|
+
}
|
|
475
|
+
}));
|
|
476
|
+
const openaiMessages = convertMessages$1(systemPrompt, messages);
|
|
477
|
+
const body = {
|
|
478
|
+
model: config.model,
|
|
479
|
+
messages: openaiMessages,
|
|
480
|
+
temperature: .3,
|
|
481
|
+
max_tokens: 8192
|
|
482
|
+
};
|
|
483
|
+
if (openaiTools && openaiTools.length > 0) {
|
|
484
|
+
body.tools = openaiTools;
|
|
485
|
+
body.tool_choice = "auto";
|
|
486
|
+
}
|
|
487
|
+
return {
|
|
488
|
+
url: `${baseURL}/chat/completions`,
|
|
489
|
+
method: "POST",
|
|
490
|
+
headers: {
|
|
491
|
+
"Content-Type": "application/json",
|
|
492
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
493
|
+
},
|
|
494
|
+
body: JSON.stringify(body)
|
|
495
|
+
};
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* 将 OpenAI Chat Completions API 原始响应解析为统一的 AIChatResponse。
|
|
499
|
+
*
|
|
500
|
+
* 解析要点:
|
|
501
|
+
* - 文本回复 → `choice.message.content`
|
|
502
|
+
* - 工具调用 → `choice.message.tool_calls`,arguments 为 JSON 字符串需 parse
|
|
503
|
+
* - Token 用量 → `usage.prompt_tokens` / `usage.completion_tokens`
|
|
504
|
+
*
|
|
505
|
+
* @throws 无有效 choice 时抛出 Error
|
|
506
|
+
*/
|
|
507
|
+
function parseOpenAIResponse(data) {
|
|
508
|
+
const d = data;
|
|
509
|
+
const choice = d.choices?.[0];
|
|
510
|
+
if (!choice) throw new Error("AI 未返回有效响应");
|
|
511
|
+
const msg = choice.message;
|
|
512
|
+
const toolCalls = msg.tool_calls?.map((tc) => ({
|
|
513
|
+
id: tc.id,
|
|
514
|
+
name: tc.function.name,
|
|
515
|
+
input: JSON.parse(tc.function.arguments)
|
|
516
|
+
}));
|
|
517
|
+
return {
|
|
518
|
+
text: msg.content || void 0,
|
|
519
|
+
toolCalls: toolCalls?.length ? toolCalls : void 0,
|
|
520
|
+
usage: d.usage ? {
|
|
521
|
+
inputTokens: d.usage.prompt_tokens ?? 0,
|
|
522
|
+
outputTokens: d.usage.completion_tokens ?? 0
|
|
523
|
+
} : void 0
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* 将统一消息格式转换为 OpenAI 消息数组。
|
|
528
|
+
*
|
|
529
|
+
* 三种特殊消息的处理:
|
|
530
|
+
* 1. tool 消息(工具结果)→ 每个结果拆分为单独的 `role: "tool"` 消息
|
|
531
|
+
* 2. assistant 含 toolCalls → 附带 `tool_calls` 字段
|
|
532
|
+
* 3. 其他消息 → 直接映射 role + content
|
|
533
|
+
*/
|
|
534
|
+
function convertMessages$1(systemPrompt, messages) {
|
|
535
|
+
const result = [{
|
|
536
|
+
role: "system",
|
|
537
|
+
content: systemPrompt
|
|
538
|
+
}];
|
|
539
|
+
for (const m of messages) if (m.role === "tool" && Array.isArray(m.content)) for (const tc of m.content) result.push({
|
|
540
|
+
role: "tool",
|
|
541
|
+
content: tc.result,
|
|
542
|
+
tool_call_id: tc.toolCallId
|
|
543
|
+
});
|
|
544
|
+
else if (m.role === "assistant" && m.toolCalls?.length) result.push({
|
|
545
|
+
role: "assistant",
|
|
546
|
+
content: typeof m.content === "string" ? m.content : null,
|
|
547
|
+
tool_calls: m.toolCalls.map((tc) => ({
|
|
548
|
+
id: tc.id,
|
|
549
|
+
type: "function",
|
|
550
|
+
function: {
|
|
551
|
+
name: tc.name,
|
|
552
|
+
arguments: JSON.stringify(tc.input)
|
|
553
|
+
}
|
|
554
|
+
}))
|
|
555
|
+
});
|
|
556
|
+
else result.push({
|
|
557
|
+
role: m.role,
|
|
558
|
+
content: typeof m.content === "string" ? m.content : JSON.stringify(m.content)
|
|
559
|
+
});
|
|
560
|
+
return result;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
//#endregion
|
|
564
|
+
//#region src/core/ai-client/anthropic.ts
|
|
565
|
+
/**
|
|
566
|
+
* Anthropic AI 客户端 — 继承 BaseAIClient。
|
|
567
|
+
*
|
|
568
|
+
* 封装完整的 Anthropic Messages API 调用流程:
|
|
569
|
+
* 1. buildAnthropicRequest() → 构建 HTTP 请求
|
|
570
|
+
* 2. fetch() → 发送请求
|
|
571
|
+
* 3. parseAnthropicResponse() → 解析响应为统一格式
|
|
572
|
+
*
|
|
573
|
+
* 使用示例:
|
|
574
|
+
* ```ts
|
|
575
|
+
* const client = new AnthropicClient({
|
|
576
|
+
* provider: "anthropic",
|
|
577
|
+
* model: "claude-sonnet-4-20250514",
|
|
578
|
+
* apiKey: "sk-ant-xxx",
|
|
579
|
+
* });
|
|
580
|
+
* const response = await client.chat({ systemPrompt, messages, tools });
|
|
581
|
+
* ```
|
|
582
|
+
*/
|
|
583
|
+
var AnthropicClient = class extends BaseAIClient {
|
|
584
|
+
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
585
|
+
config;
|
|
586
|
+
constructor(config) {
|
|
587
|
+
super({ chatHandler: async (params) => {
|
|
588
|
+
const req = buildAnthropicRequest(this.config, params);
|
|
589
|
+
const res = await fetch(req.url, {
|
|
590
|
+
method: req.method,
|
|
591
|
+
headers: req.headers,
|
|
592
|
+
body: req.body
|
|
593
|
+
});
|
|
594
|
+
if (!res.ok) {
|
|
595
|
+
const errText = await res.text();
|
|
596
|
+
throw new Error(`AI API ${res.status}: ${errText.slice(0, 500)}`);
|
|
597
|
+
}
|
|
598
|
+
return parseAnthropicResponse(await res.json());
|
|
599
|
+
} });
|
|
600
|
+
this.config = config;
|
|
601
|
+
}
|
|
602
|
+
};
|
|
603
|
+
/**
|
|
604
|
+
* 将统一格式的 ChatParams 转换为 Anthropic Messages API 请求。
|
|
605
|
+
*
|
|
606
|
+
* 关键格式差异(与 OpenAI 相比):
|
|
607
|
+
* - system prompt → body.system 字段(非消息数组元素)
|
|
608
|
+
* - 工具定义 → input_schema(而非 parameters)
|
|
609
|
+
* - 工具结果 → user 角色 + tool_result content block
|
|
610
|
+
* - AI 工具调用 → assistant 角色 + tool_use content block
|
|
611
|
+
*
|
|
612
|
+
* max_tokens 策略:opus 模型 16384,其他模型 8192。
|
|
613
|
+
* 认证头使用 `x-api-key`(而非 Authorization Bearer)。
|
|
614
|
+
*/
|
|
615
|
+
function buildAnthropicRequest(config, params) {
|
|
616
|
+
const baseURL = resolveBaseURL(config);
|
|
617
|
+
const { systemPrompt, messages, tools } = params;
|
|
618
|
+
const anthropicTools = tools?.map((t) => ({
|
|
619
|
+
name: t.name,
|
|
620
|
+
description: t.description,
|
|
621
|
+
input_schema: cleanSchema(t.schema)
|
|
622
|
+
}));
|
|
623
|
+
const anthropicMessages = convertMessages(messages);
|
|
624
|
+
const body = {
|
|
625
|
+
model: config.model,
|
|
626
|
+
max_tokens: config.model.includes("opus") ? 16384 : 8192,
|
|
627
|
+
system: systemPrompt,
|
|
628
|
+
messages: anthropicMessages
|
|
629
|
+
};
|
|
630
|
+
if (anthropicTools && anthropicTools.length > 0) body.tools = anthropicTools;
|
|
631
|
+
return {
|
|
632
|
+
url: `${baseURL}/v1/messages`,
|
|
633
|
+
method: "POST",
|
|
634
|
+
headers: {
|
|
635
|
+
"Content-Type": "application/json",
|
|
636
|
+
"x-api-key": config.apiKey,
|
|
637
|
+
"anthropic-version": "2023-06-01"
|
|
638
|
+
},
|
|
639
|
+
body: JSON.stringify(body)
|
|
640
|
+
};
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* 将 Anthropic Messages API 原始响应解析为统一的 AIChatResponse。
|
|
644
|
+
*
|
|
645
|
+
* Anthropic 使用 content block 数组返回多种内容:
|
|
646
|
+
* - type="text" → 文本回复(可能多个,合并为一个字符串)
|
|
647
|
+
* - type="tool_use" → 工具调用(id + name + input)
|
|
648
|
+
*
|
|
649
|
+
* Token 用量字段名也不同:input_tokens / output_tokens(非 prompt_tokens)。
|
|
650
|
+
*/
|
|
651
|
+
function parseAnthropicResponse(data) {
|
|
652
|
+
const d = data;
|
|
653
|
+
const text = d.content?.filter((b) => b.type === "text").map((b) => b.text).join("");
|
|
654
|
+
const toolCalls = d.content?.filter((b) => b.type === "tool_use").map((b) => ({
|
|
655
|
+
id: b.id,
|
|
656
|
+
name: b.name,
|
|
657
|
+
input: b.input
|
|
658
|
+
}));
|
|
659
|
+
return {
|
|
660
|
+
text: text || void 0,
|
|
661
|
+
toolCalls: toolCalls?.length ? toolCalls : void 0,
|
|
662
|
+
usage: d.usage ? {
|
|
663
|
+
inputTokens: d.usage.input_tokens,
|
|
664
|
+
outputTokens: d.usage.output_tokens
|
|
665
|
+
} : void 0
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* 将统一消息格式转换为 Anthropic 消息数组。
|
|
670
|
+
*
|
|
671
|
+
* 关键差异处理:
|
|
672
|
+
* 1. 过滤 system 消息(Anthropic 通过 body.system 传入)
|
|
673
|
+
* 2. tool 角色消息 → user 角色 + tool_result content block
|
|
674
|
+
* 3. assistant 含 toolCalls → text + tool_use content blocks
|
|
675
|
+
*/
|
|
676
|
+
function convertMessages(messages) {
|
|
677
|
+
return messages.filter((m) => m.role !== "system").map((m) => {
|
|
678
|
+
if (m.role === "tool" && Array.isArray(m.content)) return {
|
|
679
|
+
role: "user",
|
|
680
|
+
content: m.content.map((tc) => ({
|
|
681
|
+
type: "tool_result",
|
|
682
|
+
tool_use_id: tc.toolCallId,
|
|
683
|
+
content: tc.result
|
|
684
|
+
}))
|
|
685
|
+
};
|
|
686
|
+
if (m.role === "assistant" && m.toolCalls?.length) {
|
|
687
|
+
const content = [];
|
|
688
|
+
if (m.content && typeof m.content === "string") content.push({
|
|
689
|
+
type: "text",
|
|
690
|
+
text: m.content
|
|
691
|
+
});
|
|
692
|
+
for (const tc of m.toolCalls) content.push({
|
|
693
|
+
type: "tool_use",
|
|
694
|
+
id: tc.id,
|
|
695
|
+
name: tc.name,
|
|
696
|
+
input: tc.input
|
|
697
|
+
});
|
|
698
|
+
return {
|
|
699
|
+
role: "assistant",
|
|
700
|
+
content
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
return {
|
|
704
|
+
role: m.role,
|
|
705
|
+
content: typeof m.content === "string" ? m.content : JSON.stringify(m.content)
|
|
706
|
+
};
|
|
707
|
+
});
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
//#endregion
|
|
711
|
+
//#region src/core/ai-client/index.ts
|
|
712
|
+
/**
|
|
713
|
+
* 创建 AI 客户端(高层 API)。
|
|
714
|
+
*
|
|
715
|
+
* 根据 provider 自动创建对应的客户端类实例:
|
|
716
|
+
* - openai / copilot → new OpenAIClient(config)
|
|
717
|
+
* - anthropic → new AnthropicClient(config)
|
|
718
|
+
*
|
|
719
|
+
* 返回 AIClient 接口,调用 chat() 即可与 AI 对话。
|
|
720
|
+
*
|
|
721
|
+
* @param config - 包含 provider、model、apiKey 等配置
|
|
722
|
+
* @returns AIClient 实例(OpenAIClient 或 AnthropicClient)
|
|
723
|
+
*/
|
|
724
|
+
function createAIClient(config) {
|
|
725
|
+
validateProvider(config.provider);
|
|
726
|
+
switch (config.provider) {
|
|
727
|
+
case "openai":
|
|
728
|
+
case "copilot": return new OpenAIClient(config);
|
|
729
|
+
case "anthropic": return new AnthropicClient(config);
|
|
730
|
+
default: throw new Error(`Unknown AI provider: ${config.provider}. Supported: openai, copilot, anthropic`);
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
//#endregion
|
|
735
|
+
//#region src/core/tool-registry.ts
|
|
736
|
+
/**
|
|
737
|
+
* 工具注册表实例 — 管理一组工具的注册、查询和分发。
|
|
738
|
+
*
|
|
739
|
+
* 每个 Agent 拥有独立的 ToolRegistry 实例,从而:
|
|
740
|
+
* - Node Agent 的 exec/file 工具不会泄漏到 Web Agent
|
|
741
|
+
* - Web Agent 的 dom/navigate 工具不会泄漏到 Node Agent
|
|
742
|
+
* - 测试中不同 case 互不影响
|
|
743
|
+
*/
|
|
744
|
+
var ToolRegistry = class {
|
|
745
|
+
tools = /* @__PURE__ */ new Map();
|
|
746
|
+
/** 注册一个工具 */
|
|
747
|
+
register(tool) {
|
|
748
|
+
this.tools.set(tool.name, tool);
|
|
749
|
+
}
|
|
750
|
+
/** 获取所有已注册的工具定义列表(发给 AI,告知可用工具) */
|
|
751
|
+
getDefinitions() {
|
|
752
|
+
return Array.from(this.tools.values());
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* 根据工具名分发并执行工具调用。
|
|
756
|
+
* - 找到工具 → 执行 execute() → 返回结果
|
|
757
|
+
* - 找不到 → 返回错误信息(不抛异常,让 AI 知道工具不存在)
|
|
758
|
+
* - 执行出错 → 捕获异常,返回错误信息(不中断 Agent 循环)
|
|
759
|
+
*/
|
|
760
|
+
async dispatch(name, input) {
|
|
761
|
+
const tool = this.tools.get(name);
|
|
762
|
+
if (!tool) return {
|
|
763
|
+
content: `Unknown tool: ${name}`,
|
|
764
|
+
details: {
|
|
765
|
+
error: true,
|
|
766
|
+
toolName: name
|
|
767
|
+
}
|
|
768
|
+
};
|
|
769
|
+
try {
|
|
770
|
+
const params = input ?? {};
|
|
771
|
+
return await tool.execute(params);
|
|
772
|
+
} catch (err) {
|
|
773
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
774
|
+
return {
|
|
775
|
+
content: `Tool "${name}" failed: ${message}`,
|
|
776
|
+
details: {
|
|
777
|
+
error: true,
|
|
778
|
+
toolName: name,
|
|
779
|
+
message
|
|
780
|
+
}
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
};
|
|
785
|
+
|
|
786
|
+
//#endregion
|
|
787
|
+
//#region src/core/system-prompt.ts
|
|
788
|
+
/**
|
|
789
|
+
* 构建系统提示词。
|
|
790
|
+
* 由两部分组成:身份描述 + 可用工具列表。
|
|
791
|
+
*/
|
|
792
|
+
function buildSystemPrompt(params = {}) {
|
|
793
|
+
const sections = [];
|
|
794
|
+
sections.push("You are AutoPilot, an AI agent embedded in the user's web page.\nYou can interact with the page by clicking elements, filling forms, reading content, and executing JavaScript.\nAlways confirm destructive actions with the user before executing.\n\n## 操作策略\n\n每次用户请求操作页面时,系统会自动附上当前页面的 DOM 快照。\n快照中每个元素都带有 ref 属性(基于层级位置的唯一路径,如 /body/main/form/button)。\n请严格遵循以下流程:\n1. 分析快照,理解页面结构和元素层级关系。\n2. 从快照中找到目标元素,复制其 ref 路径。\n3. 将 ref 路径作为 dom 工具的 selector 参数传入。\n4. **禁止**猜测 CSS 选择器(如 'button'、'#id'、'.class'),必须使用快照中的 ref 路径。\n5. 规划操作步骤后,按顺序逐步执行。\n\n## 元素选择原则(语义优先)\n\n页面中可能存在多个文本相似的元素(如多个「发送」按钮、多个输入框)。\n**严禁仅凭元素文本匹配来选择操作对象**,必须结合以下语义上下文综合判断:\n1. **层级归属**:元素属于哪个区域/表单/卡片?从 ref 路径的父级结构判断(如 /body/main/form 下的按钮属于该表单)。\n2. **功能关联**:元素与用户意图的功能是否匹配?一个「发送」按钮在聊天区域,另一个在表单区域,要根据用户想操作的功能区来选择。\n3. **周围元素**:查看目标元素的兄弟节点和父级容器,理解它所在的功能模块。\n4. **属性辅助**:利用 id、class、placeholder、aria-label、name 等属性辅助确认元素的用途。\n5. **操作上下文**:如果用户在一系列操作中(如先填写表单再点提交),选择与前序操作同区域的元素。\n\n示例:用户说「点击发送按钮」,页面有两个按钮都叫「发送」:\n- /body/div[1]/div/chat-area/button → 聊天发送按钮\n- /body/div[1]/div/form/button → 表单提交按钮\n你必须根据用户意图和对话上下文判断应该点击哪个,而不是随意选择。");
|
|
795
|
+
const tools = params.tools ?? [];
|
|
796
|
+
if (tools.length > 0) {
|
|
797
|
+
const toolLines = tools.map((t) => `- **${t.name}**: ${t.description}`);
|
|
798
|
+
sections.push("## Available Tools\n\n" + toolLines.join("\n") + "\n\nUse tools when needed to complete the user's request.");
|
|
799
|
+
}
|
|
800
|
+
return sections.join("\n\n");
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
//#endregion
|
|
804
|
+
//#region src/web/page-info-tool.ts
|
|
805
|
+
/**
|
|
806
|
+
* Page Info Tool — 基于 Web API 的页面信息获取工具。
|
|
807
|
+
*
|
|
808
|
+
* 替代 Playwright 的 getTitle/getUrl/snapshot 等。
|
|
809
|
+
* 运行环境:浏览器 Content Script。
|
|
810
|
+
*
|
|
811
|
+
* 支持 6 种动作:
|
|
812
|
+
* get_url — 获取当前页面 URL
|
|
813
|
+
* get_title — 获取页面标题
|
|
814
|
+
* get_selection — 获取用户选中的文本
|
|
815
|
+
* get_viewport — 获取视口尺寸和滚动位置
|
|
816
|
+
* snapshot — 获取页面 DOM 结构快照(AI 可读的文本描述)
|
|
817
|
+
* query_all — 查询所有匹配选择器的元素,返回摘要信息
|
|
818
|
+
*/
|
|
819
|
+
/**
|
|
820
|
+
* 生成页面 DOM 快照 — 将 DOM 树转为 AI 可理解的文本描述。
|
|
821
|
+
*
|
|
822
|
+
* 类似 Playwright 的 ariaSnapshot(),但基于 Web API 实现。
|
|
823
|
+
* 只遍历可见元素,跳过 script/style/svg 等无意义节点。
|
|
824
|
+
*
|
|
825
|
+
* 每个元素自动生成基于层级位置的 XPath 引用(ref),
|
|
826
|
+
* AI 可以通过 ref 精确定位元素,无需猜测 CSS 选择器。
|
|
827
|
+
*
|
|
828
|
+
* 输出格式示例:
|
|
829
|
+
* [header] ref="/body/header"
|
|
830
|
+
* [nav] ref="/body/header/nav"
|
|
831
|
+
* [a] "首页" href="/" ref="/body/header/nav/a[1]"
|
|
832
|
+
* [a] "关于" href="/about" ref="/body/header/nav/a[2]"
|
|
833
|
+
* [main] ref="/body/main"
|
|
834
|
+
* [h1] "欢迎来到示例网站" ref="/body/main/h1"
|
|
835
|
+
* [input] type="text" placeholder="搜索..." ref="/body/main/input"
|
|
836
|
+
* [button] "搜索" id="search-btn" onclick ref="/body/main/button"
|
|
837
|
+
*
|
|
838
|
+
* 增强信息:
|
|
839
|
+
* - id:元素的 id 属性
|
|
840
|
+
* - placeholder:输入框的占位文本
|
|
841
|
+
* - 事件绑定:onclick/onchange 等内联事件处理器
|
|
842
|
+
* - 状态属性:disabled/checked/readonly/required 等
|
|
843
|
+
*
|
|
844
|
+
* @param root - 快照根元素(默认 document.body)
|
|
845
|
+
* @param options - 快照选项对象,或传入数字作为 maxDepth(向后兼容)
|
|
846
|
+
*/
|
|
847
|
+
function generateSnapshot(root = document.body, options = {}) {
|
|
848
|
+
const opts = typeof options === "number" ? { maxDepth: options } : options;
|
|
849
|
+
const maxDepth = opts.maxDepth ?? 6;
|
|
850
|
+
const viewportOnly = opts.viewportOnly ?? true;
|
|
851
|
+
const pruneLayout = opts.pruneLayout ?? true;
|
|
852
|
+
const SKIP_TAGS = new Set([
|
|
853
|
+
"SCRIPT",
|
|
854
|
+
"STYLE",
|
|
855
|
+
"SVG",
|
|
856
|
+
"NOSCRIPT",
|
|
857
|
+
"LINK",
|
|
858
|
+
"META",
|
|
859
|
+
"BR",
|
|
860
|
+
"HR"
|
|
861
|
+
]);
|
|
862
|
+
/** 纯布局容器标签 — 智能剪枝时可能被折叠 */
|
|
863
|
+
const LAYOUT_TAGS = new Set([
|
|
864
|
+
"DIV",
|
|
865
|
+
"SPAN",
|
|
866
|
+
"SECTION",
|
|
867
|
+
"ARTICLE",
|
|
868
|
+
"ASIDE",
|
|
869
|
+
"MAIN",
|
|
870
|
+
"HEADER",
|
|
871
|
+
"FOOTER",
|
|
872
|
+
"NAV",
|
|
873
|
+
"FIGURE",
|
|
874
|
+
"FIGCAPTION"
|
|
875
|
+
]);
|
|
876
|
+
/** 视口尺寸(viewportOnly 开启时使用) */
|
|
877
|
+
const vpWidth = viewportOnly ? window.innerWidth : 0;
|
|
878
|
+
const vpHeight = viewportOnly ? window.innerHeight : 0;
|
|
879
|
+
const INTERACTIVE_ATTRS = [
|
|
880
|
+
"href",
|
|
881
|
+
"type",
|
|
882
|
+
"placeholder",
|
|
883
|
+
"value",
|
|
884
|
+
"name",
|
|
885
|
+
"role",
|
|
886
|
+
"aria-label",
|
|
887
|
+
"src",
|
|
888
|
+
"alt",
|
|
889
|
+
"title",
|
|
890
|
+
"for",
|
|
891
|
+
"action",
|
|
892
|
+
"method",
|
|
893
|
+
"target",
|
|
894
|
+
"min",
|
|
895
|
+
"max",
|
|
896
|
+
"pattern",
|
|
897
|
+
"maxlength",
|
|
898
|
+
"tabindex"
|
|
899
|
+
];
|
|
900
|
+
/** 布尔状态属性 — 只在存在时输出(无值),如 disabled、checked */
|
|
901
|
+
const BOOLEAN_ATTRS = [
|
|
902
|
+
"disabled",
|
|
903
|
+
"checked",
|
|
904
|
+
"readonly",
|
|
905
|
+
"required",
|
|
906
|
+
"selected",
|
|
907
|
+
"hidden",
|
|
908
|
+
"multiple",
|
|
909
|
+
"autofocus",
|
|
910
|
+
"open"
|
|
911
|
+
];
|
|
912
|
+
/** 内联事件属性前缀 */
|
|
913
|
+
const EVENT_PREFIX = "on";
|
|
914
|
+
/**
|
|
915
|
+
* 计算元素在父节点中同标签兄弟里的序号(1-based,XPath 规范)。
|
|
916
|
+
* 如果同标签兄弟只有一个,返回空字符串(无需索引消歧)。
|
|
917
|
+
*/
|
|
918
|
+
function getSiblingIndex(el) {
|
|
919
|
+
const parent = el.parentElement;
|
|
920
|
+
if (!parent) return "";
|
|
921
|
+
const tag = el.tagName;
|
|
922
|
+
const siblings = Array.from(parent.children).filter((c) => c.tagName === tag);
|
|
923
|
+
if (siblings.length <= 1) return "";
|
|
924
|
+
return `[${siblings.indexOf(el) + 1}]`;
|
|
925
|
+
}
|
|
926
|
+
/**
|
|
927
|
+
* 判断元素是否与视口相交(部分可见也算)。
|
|
928
|
+
* 对根级容器(depth <= 1)始终返回 true,确保不丢失顶层结构。
|
|
929
|
+
*/
|
|
930
|
+
function isInViewport(el, depth) {
|
|
931
|
+
if (!viewportOnly) return true;
|
|
932
|
+
if (depth <= 1) return true;
|
|
933
|
+
const rect = el.getBoundingClientRect();
|
|
934
|
+
if (rect.bottom < 0 || rect.top > vpHeight) return false;
|
|
935
|
+
if (rect.right < 0 || rect.left > vpWidth) return false;
|
|
936
|
+
if (rect.width === 0 && rect.height === 0) return false;
|
|
937
|
+
return true;
|
|
938
|
+
}
|
|
939
|
+
/**
|
|
940
|
+
* 判断元素是否为「无意义布局容器」(智能剪枝候选)。
|
|
941
|
+
* 满足所有条件时返回 true:
|
|
942
|
+
* 1. 标签是常见布局容器(div/span/section 等)
|
|
943
|
+
* 2. 没有 id
|
|
944
|
+
* 3. 没有交互属性(href/role/aria-label/onclick 等)
|
|
945
|
+
* 4. 没有直接文本内容
|
|
946
|
+
*/
|
|
947
|
+
function isEmptyLayoutContainer(el, directText) {
|
|
948
|
+
if (!pruneLayout) return false;
|
|
949
|
+
if (!LAYOUT_TAGS.has(el.tagName)) return false;
|
|
950
|
+
if (el.getAttribute("id")) return false;
|
|
951
|
+
if (el.getAttribute("role") || el.getAttribute("aria-label")) return false;
|
|
952
|
+
for (const attr of Array.from(el.attributes)) if (attr.name.startsWith("on")) return false;
|
|
953
|
+
if (directText) return false;
|
|
954
|
+
return true;
|
|
955
|
+
}
|
|
956
|
+
function walk(el, depth, parentPath) {
|
|
957
|
+
if (depth > maxDepth) return "";
|
|
958
|
+
if (SKIP_TAGS.has(el.tagName)) return "";
|
|
959
|
+
const style = window.getComputedStyle(el);
|
|
960
|
+
if (style.display === "none" || style.visibility === "hidden") return "";
|
|
961
|
+
if (!isInViewport(el, depth)) return "";
|
|
962
|
+
const indent = " ".repeat(depth);
|
|
963
|
+
const tag = el.tagName.toLowerCase();
|
|
964
|
+
const currentPath = `${parentPath}/${tag}${getSiblingIndex(el)}`;
|
|
965
|
+
const attrs = [];
|
|
966
|
+
const elId = el.getAttribute("id");
|
|
967
|
+
if (elId) attrs.push(`id="${elId}"`);
|
|
968
|
+
const className = el.getAttribute("class")?.trim();
|
|
969
|
+
if (className) {
|
|
970
|
+
const classes = className.split(/\s+/).filter(Boolean).slice(0, 3).join(" ");
|
|
971
|
+
if (classes) attrs.push(`class="${classes}"`);
|
|
972
|
+
}
|
|
973
|
+
for (const attr of INTERACTIVE_ATTRS) {
|
|
974
|
+
const val = el.getAttribute(attr);
|
|
975
|
+
if (val) attrs.push(`${attr}="${val}"`);
|
|
976
|
+
}
|
|
977
|
+
for (const attr of BOOLEAN_ATTRS) if (el.hasAttribute(attr)) attrs.push(attr);
|
|
978
|
+
const events = [];
|
|
979
|
+
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith(EVENT_PREFIX)) events.push(attrObj.name);
|
|
980
|
+
if (events.length > 0) attrs.push(`events=[${events.join(",")}]`);
|
|
981
|
+
const dataAttrs = [];
|
|
982
|
+
for (const attrObj of Array.from(el.attributes)) if (attrObj.name.startsWith("data-") && dataAttrs.length < 3) dataAttrs.push(`${attrObj.name}="${attrObj.value.slice(0, 30)}"`);
|
|
983
|
+
if (dataAttrs.length > 0) attrs.push(...dataAttrs);
|
|
984
|
+
if ((el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) && el.value) {
|
|
985
|
+
const currentVal = el.value.slice(0, 60);
|
|
986
|
+
if (el.getAttribute("value") !== currentVal) attrs.push(`current-value="${currentVal}"`);
|
|
987
|
+
}
|
|
988
|
+
let directText = "";
|
|
989
|
+
for (let i = 0; i < el.childNodes.length; i++) {
|
|
990
|
+
const node = el.childNodes[i];
|
|
991
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
992
|
+
const t = node.textContent?.trim();
|
|
993
|
+
if (t) directText += t + " ";
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
directText = directText.trim();
|
|
997
|
+
if (isEmptyLayoutContainer(el, directText)) {
|
|
998
|
+
const childLines = [];
|
|
999
|
+
for (let i = 0; i < el.children.length; i++) {
|
|
1000
|
+
const childResult = walk(el.children[i], depth, currentPath);
|
|
1001
|
+
if (childResult) childLines.push(childResult);
|
|
1002
|
+
}
|
|
1003
|
+
return childLines.join("\n");
|
|
1004
|
+
}
|
|
1005
|
+
let line = `${indent}[${tag}]`;
|
|
1006
|
+
if (directText) line += ` "${directText.slice(0, 80)}"`;
|
|
1007
|
+
if (attrs.length) line += ` ${attrs.join(" ")}`;
|
|
1008
|
+
line += ` ref="${currentPath}"`;
|
|
1009
|
+
const lines = [line];
|
|
1010
|
+
for (let i = 0; i < el.children.length; i++) {
|
|
1011
|
+
const childResult = walk(el.children[i], depth + 1, currentPath);
|
|
1012
|
+
if (childResult) lines.push(childResult);
|
|
1013
|
+
}
|
|
1014
|
+
return lines.join("\n");
|
|
1015
|
+
}
|
|
1016
|
+
return walk(root, 0, "") || "(空页面)";
|
|
1017
|
+
}
|
|
1018
|
+
/**
|
|
1019
|
+
* 查询所有匹配元素并返回摘要信息(标签、文本、关键属性)。
|
|
1020
|
+
*/
|
|
1021
|
+
function queryAllElements(selector, limit = 20) {
|
|
1022
|
+
try {
|
|
1023
|
+
const elements = document.querySelectorAll(selector);
|
|
1024
|
+
if (elements.length === 0) return `未找到匹配 "${selector}" 的元素`;
|
|
1025
|
+
const results = [`找到 ${elements.length} 个元素:`];
|
|
1026
|
+
const count = Math.min(elements.length, limit);
|
|
1027
|
+
for (let i = 0; i < count; i++) {
|
|
1028
|
+
const el = elements[i];
|
|
1029
|
+
const tag = el.tagName.toLowerCase();
|
|
1030
|
+
const text = el.textContent?.trim().slice(0, 60) ?? "";
|
|
1031
|
+
const id = el.id ? `#${el.id}` : "";
|
|
1032
|
+
const cls = el.className && typeof el.className === "string" ? `.${el.className.split(" ").filter(Boolean).join(".")}` : "";
|
|
1033
|
+
results.push(` ${i + 1}. <${tag}${id}${cls}> "${text}"`);
|
|
1034
|
+
}
|
|
1035
|
+
if (elements.length > limit) results.push(` ...还有 ${elements.length - limit} 个元素`);
|
|
1036
|
+
return results.join("\n");
|
|
1037
|
+
} catch (e) {
|
|
1038
|
+
return `选择器语法错误: ${selector}`;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
function createPageInfoTool() {
|
|
1042
|
+
return {
|
|
1043
|
+
name: "page_info",
|
|
1044
|
+
description: [
|
|
1045
|
+
"Get information about the current page.",
|
|
1046
|
+
"Actions: get_url, get_title, get_selection (selected text),",
|
|
1047
|
+
"get_viewport (size & scroll), snapshot (DOM structure), query_all (find all matching elements)."
|
|
1048
|
+
].join(" "),
|
|
1049
|
+
schema: Type.Object({
|
|
1050
|
+
action: Type.String({ description: "Info action: get_url | get_title | get_selection | get_viewport | snapshot | query_all" }),
|
|
1051
|
+
selector: Type.Optional(Type.String({ description: "CSS selector for query_all action" })),
|
|
1052
|
+
maxDepth: Type.Optional(Type.Number({ description: "Max depth for snapshot (default: 6)" })),
|
|
1053
|
+
viewportOnly: Type.Optional(Type.Boolean({ description: "Only snapshot elements visible in viewport (default: true)" })),
|
|
1054
|
+
pruneLayout: Type.Optional(Type.Boolean({ description: "Collapse empty layout containers like div/span (default: true)" }))
|
|
1055
|
+
}),
|
|
1056
|
+
execute: async (params) => {
|
|
1057
|
+
const action = params.action;
|
|
1058
|
+
try {
|
|
1059
|
+
switch (action) {
|
|
1060
|
+
case "get_url": return { content: window.location.href };
|
|
1061
|
+
case "get_title": return { content: document.title || "(无标题)" };
|
|
1062
|
+
case "get_selection": return { content: (window.getSelection()?.toString().trim() ?? "") || "(未选中任何文本)" };
|
|
1063
|
+
case "get_viewport": {
|
|
1064
|
+
const info = {
|
|
1065
|
+
viewportWidth: window.innerWidth,
|
|
1066
|
+
viewportHeight: window.innerHeight,
|
|
1067
|
+
scrollX: window.scrollX,
|
|
1068
|
+
scrollY: window.scrollY,
|
|
1069
|
+
pageWidth: document.documentElement.scrollWidth,
|
|
1070
|
+
pageHeight: document.documentElement.scrollHeight
|
|
1071
|
+
};
|
|
1072
|
+
return { content: JSON.stringify(info, null, 2) };
|
|
1073
|
+
}
|
|
1074
|
+
case "snapshot": {
|
|
1075
|
+
const maxDepth = params.maxDepth ?? 6;
|
|
1076
|
+
const viewportOnly = params.viewportOnly ?? true;
|
|
1077
|
+
const pruneLayout = params.pruneLayout ?? true;
|
|
1078
|
+
return { content: generateSnapshot(document.body, {
|
|
1079
|
+
maxDepth,
|
|
1080
|
+
viewportOnly,
|
|
1081
|
+
pruneLayout
|
|
1082
|
+
}) };
|
|
1083
|
+
}
|
|
1084
|
+
case "query_all": {
|
|
1085
|
+
const selector = params.selector;
|
|
1086
|
+
if (!selector) return { content: "缺少 selector 参数" };
|
|
1087
|
+
return { content: queryAllElements(selector) };
|
|
1088
|
+
}
|
|
1089
|
+
default: return { content: `未知的页面信息动作: ${action}` };
|
|
1090
|
+
}
|
|
1091
|
+
} catch (err) {
|
|
1092
|
+
return {
|
|
1093
|
+
content: `页面信息操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1094
|
+
details: {
|
|
1095
|
+
error: true,
|
|
1096
|
+
action
|
|
1097
|
+
}
|
|
1098
|
+
};
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
//#endregion
|
|
1105
|
+
//#region src/web/dom-tool.ts
|
|
1106
|
+
/**
|
|
1107
|
+
* DOM Tool — 基于 Web API 的 DOM 操作工具。
|
|
1108
|
+
*
|
|
1109
|
+
* 替代 Playwright 的 click/fill/type 等操作,直接在页面上下文中执行。
|
|
1110
|
+
* 运行环境:浏览器 Content Script。
|
|
1111
|
+
*
|
|
1112
|
+
* 支持 8 种动作:
|
|
1113
|
+
* click — 点击元素
|
|
1114
|
+
* fill — 填写输入框(清空后设值)
|
|
1115
|
+
* type — 逐字符模拟键入
|
|
1116
|
+
* get_text — 获取元素文本内容
|
|
1117
|
+
* get_attr — 获取元素属性值
|
|
1118
|
+
* set_attr — 设置元素属性
|
|
1119
|
+
* add_class — 添加 CSS 类名
|
|
1120
|
+
* remove_class — 移除 CSS 类名
|
|
1121
|
+
*/
|
|
1122
|
+
const DEFAULT_WAIT_MS = 1e3;
|
|
1123
|
+
function sleep(ms) {
|
|
1124
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1125
|
+
}
|
|
1126
|
+
/**
|
|
1127
|
+
* 通过快照 ref(XPath 路径)解析到 DOM 元素。
|
|
1128
|
+
*
|
|
1129
|
+
* ref 格式示例:/body/div[1]/main/form/input[2]
|
|
1130
|
+
* 每段为 tagName,可选 [n] 表示同标签兄弟中第 n 个(1-based)。
|
|
1131
|
+
*/
|
|
1132
|
+
function resolveRef(ref) {
|
|
1133
|
+
const segments = ref.split("/").filter(Boolean);
|
|
1134
|
+
let current = document.documentElement;
|
|
1135
|
+
for (let i = 0; i < segments.length; i++) {
|
|
1136
|
+
const seg = segments[i];
|
|
1137
|
+
if (!current) return null;
|
|
1138
|
+
const match = seg.match(/^([a-z0-9-]+)(?:\[(\d+)\])?$/i);
|
|
1139
|
+
if (!match) return null;
|
|
1140
|
+
const tag = match[1].toUpperCase();
|
|
1141
|
+
const index = match[2] ? parseInt(match[2], 10) : 1;
|
|
1142
|
+
if (i === 0 && current.tagName === tag) continue;
|
|
1143
|
+
const children = Array.from(current.children).filter((c) => c.tagName === tag);
|
|
1144
|
+
const sameTagCount = children.length;
|
|
1145
|
+
if (sameTagCount === 0) return null;
|
|
1146
|
+
if (sameTagCount === 1) current = children[0];
|
|
1147
|
+
else {
|
|
1148
|
+
if (index < 1 || index > sameTagCount) return null;
|
|
1149
|
+
current = children[index - 1];
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
return current;
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* 安全地查询 DOM 元素。
|
|
1156
|
+
*
|
|
1157
|
+
* 支持两种定位方式:
|
|
1158
|
+
* - ref 路径(以 "/" 开头):使用快照生成的 XPath 精确定位
|
|
1159
|
+
* - CSS 选择器(其他):传统 querySelector
|
|
1160
|
+
*/
|
|
1161
|
+
function queryElement(selector) {
|
|
1162
|
+
try {
|
|
1163
|
+
if (selector.startsWith("/")) {
|
|
1164
|
+
const el = resolveRef(selector);
|
|
1165
|
+
if (!el) return `未找到 ref "${selector}" 对应的元素`;
|
|
1166
|
+
return el;
|
|
1167
|
+
}
|
|
1168
|
+
const el = document.querySelector(selector);
|
|
1169
|
+
if (!el) return `未找到匹配 "${selector}" 的元素`;
|
|
1170
|
+
return el;
|
|
1171
|
+
} catch (e) {
|
|
1172
|
+
return `选择器语法错误: ${selector}`;
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
/**
|
|
1176
|
+
* 在给定超时时间内轮询查找元素。
|
|
1177
|
+
* - 返回 Element:找到元素
|
|
1178
|
+
* - 返回 string:选择器语法错误
|
|
1179
|
+
* - 返回 null:超时未找到
|
|
1180
|
+
*/
|
|
1181
|
+
async function waitForElement(selector, timeoutMs) {
|
|
1182
|
+
const start = Date.now();
|
|
1183
|
+
while (Date.now() - start <= timeoutMs) {
|
|
1184
|
+
const elOrError = queryElement(selector);
|
|
1185
|
+
if (typeof elOrError !== "string") return elOrError;
|
|
1186
|
+
if (elOrError.startsWith("选择器语法错误")) return elOrError;
|
|
1187
|
+
await sleep(100);
|
|
1188
|
+
}
|
|
1189
|
+
return null;
|
|
1190
|
+
}
|
|
1191
|
+
function resolveWaitMs(params) {
|
|
1192
|
+
const waitMs = params.waitMs;
|
|
1193
|
+
if (typeof waitMs === "number" && Number.isFinite(waitMs)) return Math.max(0, Math.floor(waitMs));
|
|
1194
|
+
const waitSeconds = params.waitSeconds;
|
|
1195
|
+
if (typeof waitSeconds === "number" && Number.isFinite(waitSeconds)) return Math.max(0, Math.floor(waitSeconds * 1e3));
|
|
1196
|
+
return DEFAULT_WAIT_MS;
|
|
1197
|
+
}
|
|
1198
|
+
/**
|
|
1199
|
+
* 模拟真实用户输入:触发 input、change 事件,兼容 React/Vue 等框架。
|
|
1200
|
+
*/
|
|
1201
|
+
function dispatchInputEvents(el) {
|
|
1202
|
+
el.dispatchEvent(new Event("input", {
|
|
1203
|
+
bubbles: true,
|
|
1204
|
+
cancelable: true
|
|
1205
|
+
}));
|
|
1206
|
+
el.dispatchEvent(new Event("change", {
|
|
1207
|
+
bubbles: true,
|
|
1208
|
+
cancelable: true
|
|
1209
|
+
}));
|
|
1210
|
+
}
|
|
1211
|
+
/**
|
|
1212
|
+
* 生成元素的可读描述,用于在操作结果中展示实际命中的 DOM 节点。
|
|
1213
|
+
* 格式:<tag#id.class> "文本" [attr=val, ...]
|
|
1214
|
+
*/
|
|
1215
|
+
function describeElement(el) {
|
|
1216
|
+
const tag = el.tagName.toLowerCase();
|
|
1217
|
+
const id = el.id ? `#${el.id}` : "";
|
|
1218
|
+
const cls = el.className && typeof el.className === "string" ? el.className.trim().split(/\s+/).filter(Boolean).slice(0, 3).map((c) => `.${c}`).join("") : "";
|
|
1219
|
+
const text = el.textContent?.trim().slice(0, 40) ?? "";
|
|
1220
|
+
const textHint = text ? ` "${text}"` : "";
|
|
1221
|
+
const hints = [];
|
|
1222
|
+
for (const attr of [
|
|
1223
|
+
"type",
|
|
1224
|
+
"name",
|
|
1225
|
+
"placeholder",
|
|
1226
|
+
"href",
|
|
1227
|
+
"role"
|
|
1228
|
+
]) {
|
|
1229
|
+
const val = el.getAttribute(attr);
|
|
1230
|
+
if (val) hints.push(`${attr}=${val}`);
|
|
1231
|
+
}
|
|
1232
|
+
return `<${tag}${id}${cls}>${textHint}${hints.length > 0 ? ` [${hints.join(", ")}]` : ""}`;
|
|
1233
|
+
}
|
|
1234
|
+
function createDomTool() {
|
|
1235
|
+
return {
|
|
1236
|
+
name: "dom",
|
|
1237
|
+
description: [
|
|
1238
|
+
"Perform DOM operations on the current page.",
|
|
1239
|
+
"Actions: click, fill, type, get_text, get_attr, set_attr, add_class, remove_class.",
|
|
1240
|
+
"Use the ref path from the DOM snapshot (e.g. /body/main/button) as selector to precisely target elements.",
|
|
1241
|
+
"CSS selectors are also supported but ref paths are preferred for accuracy."
|
|
1242
|
+
].join(" "),
|
|
1243
|
+
schema: Type.Object({
|
|
1244
|
+
action: Type.String({ description: "DOM action: click | fill | type | get_text | get_attr | set_attr | add_class | remove_class" }),
|
|
1245
|
+
selector: Type.String({ description: "Element ref path from snapshot (e.g. /body/main/button[2]) or CSS selector" }),
|
|
1246
|
+
value: Type.Optional(Type.String({ description: "Value for fill/type/set_attr actions" })),
|
|
1247
|
+
attribute: Type.Optional(Type.String({ description: "Attribute name for get_attr/set_attr actions" })),
|
|
1248
|
+
className: Type.Optional(Type.String({ description: "CSS class name for add_class/remove_class" })),
|
|
1249
|
+
waitMs: Type.Optional(Type.Number({ description: "Optional wait timeout in ms before action (default: 1000). Use 0 to disable waiting." })),
|
|
1250
|
+
waitSeconds: Type.Optional(Type.Number({ description: "Optional wait timeout in seconds before action. Used when waitMs is not provided." }))
|
|
1251
|
+
}),
|
|
1252
|
+
execute: async (params) => {
|
|
1253
|
+
const action = params.action;
|
|
1254
|
+
const selector = params.selector;
|
|
1255
|
+
const waitMs = resolveWaitMs(params);
|
|
1256
|
+
if (!selector) return { content: "缺少 selector 参数" };
|
|
1257
|
+
let el;
|
|
1258
|
+
if (waitMs > 0) {
|
|
1259
|
+
const found = await waitForElement(selector, waitMs);
|
|
1260
|
+
if (typeof found === "string") return {
|
|
1261
|
+
content: found,
|
|
1262
|
+
details: {
|
|
1263
|
+
error: true,
|
|
1264
|
+
code: "INVALID_SELECTOR",
|
|
1265
|
+
action,
|
|
1266
|
+
selector
|
|
1267
|
+
}
|
|
1268
|
+
};
|
|
1269
|
+
if (!found) return {
|
|
1270
|
+
content: `未找到匹配 "${selector}" 的元素`,
|
|
1271
|
+
details: {
|
|
1272
|
+
error: true,
|
|
1273
|
+
code: "ELEMENT_NOT_FOUND",
|
|
1274
|
+
action,
|
|
1275
|
+
selector,
|
|
1276
|
+
waitMs
|
|
1277
|
+
}
|
|
1278
|
+
};
|
|
1279
|
+
el = found;
|
|
1280
|
+
} else {
|
|
1281
|
+
const elOrError = queryElement(selector);
|
|
1282
|
+
if (typeof elOrError === "string") return {
|
|
1283
|
+
content: elOrError,
|
|
1284
|
+
details: {
|
|
1285
|
+
error: true,
|
|
1286
|
+
code: elOrError.startsWith("未找到") ? "ELEMENT_NOT_FOUND" : "INVALID_SELECTOR",
|
|
1287
|
+
action,
|
|
1288
|
+
selector,
|
|
1289
|
+
waitMs
|
|
1290
|
+
}
|
|
1291
|
+
};
|
|
1292
|
+
el = elOrError;
|
|
1293
|
+
}
|
|
1294
|
+
try {
|
|
1295
|
+
switch (action) {
|
|
1296
|
+
case "click":
|
|
1297
|
+
if (el instanceof HTMLElement) {
|
|
1298
|
+
el.focus();
|
|
1299
|
+
el.click();
|
|
1300
|
+
} else el.dispatchEvent(new MouseEvent("click", { bubbles: true }));
|
|
1301
|
+
return { content: `已点击 ${describeElement(el)}` };
|
|
1302
|
+
case "fill": {
|
|
1303
|
+
const value = params.value;
|
|
1304
|
+
if (value === void 0) return { content: "缺少 value 参数" };
|
|
1305
|
+
if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
|
|
1306
|
+
el.focus();
|
|
1307
|
+
el.value = value;
|
|
1308
|
+
dispatchInputEvents(el);
|
|
1309
|
+
} else if (el instanceof HTMLElement && el.isContentEditable) {
|
|
1310
|
+
el.focus();
|
|
1311
|
+
el.textContent = value;
|
|
1312
|
+
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1313
|
+
} else return { content: `"${selector}" 不是可编辑元素` };
|
|
1314
|
+
return { content: `已填写 ${describeElement(el)}: "${value}"` };
|
|
1315
|
+
}
|
|
1316
|
+
case "type": {
|
|
1317
|
+
const value = params.value;
|
|
1318
|
+
if (value === void 0) return { content: "缺少 value 参数" };
|
|
1319
|
+
if (el instanceof HTMLElement) el.focus();
|
|
1320
|
+
for (const char of value) {
|
|
1321
|
+
el.dispatchEvent(new KeyboardEvent("keydown", {
|
|
1322
|
+
key: char,
|
|
1323
|
+
bubbles: true
|
|
1324
|
+
}));
|
|
1325
|
+
el.dispatchEvent(new KeyboardEvent("keypress", {
|
|
1326
|
+
key: char,
|
|
1327
|
+
bubbles: true
|
|
1328
|
+
}));
|
|
1329
|
+
if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) el.value += char;
|
|
1330
|
+
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1331
|
+
el.dispatchEvent(new KeyboardEvent("keyup", {
|
|
1332
|
+
key: char,
|
|
1333
|
+
bubbles: true
|
|
1334
|
+
}));
|
|
1335
|
+
}
|
|
1336
|
+
return { content: `已逐字输入到 ${describeElement(el)}: "${value}"` };
|
|
1337
|
+
}
|
|
1338
|
+
case "get_text": {
|
|
1339
|
+
const text = el.textContent?.trim() ?? "";
|
|
1340
|
+
return { content: `${describeElement(el)} 的文本内容:${text || "(空)"}` };
|
|
1341
|
+
}
|
|
1342
|
+
case "get_attr": {
|
|
1343
|
+
const attribute = params.attribute;
|
|
1344
|
+
if (!attribute) return { content: "缺少 attribute 参数" };
|
|
1345
|
+
const attrValue = el.getAttribute(attribute);
|
|
1346
|
+
return { content: `${describeElement(el)} 的 ${attribute} = ${attrValue ?? "(不存在)"}` };
|
|
1347
|
+
}
|
|
1348
|
+
case "set_attr": {
|
|
1349
|
+
const attribute = params.attribute;
|
|
1350
|
+
const value = params.value;
|
|
1351
|
+
if (!attribute || value === void 0) return { content: "缺少 attribute 或 value 参数" };
|
|
1352
|
+
el.setAttribute(attribute, value);
|
|
1353
|
+
return { content: `已设置 ${describeElement(el)} 的 ${attribute}="${value}"` };
|
|
1354
|
+
}
|
|
1355
|
+
case "add_class": {
|
|
1356
|
+
const className = params.className;
|
|
1357
|
+
if (!className) return { content: "缺少 className 参数" };
|
|
1358
|
+
el.classList.add(className);
|
|
1359
|
+
return { content: `已添加 class "${className}" 到 ${describeElement(el)}` };
|
|
1360
|
+
}
|
|
1361
|
+
case "remove_class": {
|
|
1362
|
+
const className = params.className;
|
|
1363
|
+
if (!className) return { content: "缺少 className 参数" };
|
|
1364
|
+
el.classList.remove(className);
|
|
1365
|
+
return { content: `已移除 ${describeElement(el)} 的 class "${className}"` };
|
|
1366
|
+
}
|
|
1367
|
+
default: return { content: `未知的 DOM 动作: ${action}` };
|
|
1368
|
+
}
|
|
1369
|
+
} catch (err) {
|
|
1370
|
+
return {
|
|
1371
|
+
content: `DOM 操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1372
|
+
details: {
|
|
1373
|
+
error: true,
|
|
1374
|
+
action,
|
|
1375
|
+
selector
|
|
1376
|
+
}
|
|
1377
|
+
};
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
};
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
//#endregion
|
|
1384
|
+
//#region src/web/navigate-tool.ts
|
|
1385
|
+
/**
|
|
1386
|
+
* Navigate Tool — 基于 Web API 的页面导航工具。
|
|
1387
|
+
*
|
|
1388
|
+
* 替代 Playwright 的 goto/goBack/goForward/reload。
|
|
1389
|
+
* 运行环境:浏览器 Content Script。
|
|
1390
|
+
*
|
|
1391
|
+
* 支持 5 种动作:
|
|
1392
|
+
* goto — 跳转到指定 URL
|
|
1393
|
+
* back — 浏览器后退
|
|
1394
|
+
* forward — 浏览器前进
|
|
1395
|
+
* reload — 刷新当前页面
|
|
1396
|
+
* scroll — 滚动页面到指定位置或元素
|
|
1397
|
+
*/
|
|
1398
|
+
function createNavigateTool() {
|
|
1399
|
+
return {
|
|
1400
|
+
name: "navigate",
|
|
1401
|
+
description: ["Navigate the current page.", "Actions: goto (open URL), back, forward, reload, scroll (to position or element)."].join(" "),
|
|
1402
|
+
schema: Type.Object({
|
|
1403
|
+
action: Type.String({ description: "Navigation action: goto | back | forward | reload | scroll" }),
|
|
1404
|
+
url: Type.Optional(Type.String({ description: "URL for goto action" })),
|
|
1405
|
+
selector: Type.Optional(Type.String({ description: "CSS selector for scroll action (scrolls element into view)" })),
|
|
1406
|
+
x: Type.Optional(Type.Number({ description: "Horizontal scroll position (pixels)" })),
|
|
1407
|
+
y: Type.Optional(Type.Number({ description: "Vertical scroll position (pixels)" }))
|
|
1408
|
+
}),
|
|
1409
|
+
execute: async (params) => {
|
|
1410
|
+
const action = params.action;
|
|
1411
|
+
try {
|
|
1412
|
+
switch (action) {
|
|
1413
|
+
case "goto": {
|
|
1414
|
+
const url = params.url;
|
|
1415
|
+
if (!url) return { content: "缺少 url 参数" };
|
|
1416
|
+
window.location.href = url;
|
|
1417
|
+
return { content: `正在导航到 ${url}` };
|
|
1418
|
+
}
|
|
1419
|
+
case "back":
|
|
1420
|
+
window.history.back();
|
|
1421
|
+
return { content: "已后退" };
|
|
1422
|
+
case "forward":
|
|
1423
|
+
window.history.forward();
|
|
1424
|
+
return { content: "已前进" };
|
|
1425
|
+
case "reload":
|
|
1426
|
+
window.location.reload();
|
|
1427
|
+
return { content: "正在刷新页面" };
|
|
1428
|
+
case "scroll": {
|
|
1429
|
+
const selector = params.selector;
|
|
1430
|
+
if (selector) {
|
|
1431
|
+
const el = document.querySelector(selector);
|
|
1432
|
+
if (!el) return { content: `未找到元素 "${selector}"` };
|
|
1433
|
+
el.scrollIntoView({
|
|
1434
|
+
behavior: "smooth",
|
|
1435
|
+
block: "center"
|
|
1436
|
+
});
|
|
1437
|
+
return { content: `已滚动到元素 "${selector}"` };
|
|
1438
|
+
}
|
|
1439
|
+
const x = params.x ?? 0;
|
|
1440
|
+
const y = params.y ?? 0;
|
|
1441
|
+
window.scrollTo({
|
|
1442
|
+
left: x,
|
|
1443
|
+
top: y,
|
|
1444
|
+
behavior: "smooth"
|
|
1445
|
+
});
|
|
1446
|
+
return { content: `已滚动到 (${x}, ${y})` };
|
|
1447
|
+
}
|
|
1448
|
+
default: return { content: `未知的导航动作: ${action}` };
|
|
1449
|
+
}
|
|
1450
|
+
} catch (err) {
|
|
1451
|
+
return {
|
|
1452
|
+
content: `导航操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1453
|
+
details: {
|
|
1454
|
+
error: true,
|
|
1455
|
+
action
|
|
1456
|
+
}
|
|
1457
|
+
};
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
};
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
//#endregion
|
|
1464
|
+
//#region src/web/wait-tool.ts
|
|
1465
|
+
/**
|
|
1466
|
+
* Wait Tool — 基于 MutationObserver 的元素等待工具。
|
|
1467
|
+
*
|
|
1468
|
+
* 替代 Playwright 的 waitForSelector/waitForNavigation。
|
|
1469
|
+
* 运行环境:浏览器 Content Script。
|
|
1470
|
+
*
|
|
1471
|
+
* 支持 3 种动作:
|
|
1472
|
+
* wait_for_selector — 等待匹配选择器的元素出现
|
|
1473
|
+
* wait_for_hidden — 等待元素消失或隐藏
|
|
1474
|
+
* wait_for_text — 等待页面中出现指定文本
|
|
1475
|
+
*/
|
|
1476
|
+
/** 默认超时时间(毫秒) */
|
|
1477
|
+
const DEFAULT_TIMEOUT = 1e4;
|
|
1478
|
+
/**
|
|
1479
|
+
* 通过 MutationObserver 等待元素出现。
|
|
1480
|
+
* 先检查元素是否已存在,不存在则监听 DOM 变化直到出现或超时。
|
|
1481
|
+
*/
|
|
1482
|
+
function waitForSelector(selector, timeoutMs) {
|
|
1483
|
+
return new Promise((resolve, reject) => {
|
|
1484
|
+
const existing = document.querySelector(selector);
|
|
1485
|
+
if (existing) {
|
|
1486
|
+
resolve(existing);
|
|
1487
|
+
return;
|
|
1488
|
+
}
|
|
1489
|
+
const timer = setTimeout(() => {
|
|
1490
|
+
observer.disconnect();
|
|
1491
|
+
reject(/* @__PURE__ */ new Error(`等待 "${selector}" 超时 (${timeoutMs}ms)`));
|
|
1492
|
+
}, timeoutMs);
|
|
1493
|
+
const observer = new MutationObserver(() => {
|
|
1494
|
+
const el = document.querySelector(selector);
|
|
1495
|
+
if (el) {
|
|
1496
|
+
clearTimeout(timer);
|
|
1497
|
+
observer.disconnect();
|
|
1498
|
+
resolve(el);
|
|
1499
|
+
}
|
|
1500
|
+
});
|
|
1501
|
+
observer.observe(document.body, {
|
|
1502
|
+
childList: true,
|
|
1503
|
+
subtree: true,
|
|
1504
|
+
attributes: true
|
|
1505
|
+
});
|
|
1506
|
+
});
|
|
1507
|
+
}
|
|
1508
|
+
/**
|
|
1509
|
+
* 等待元素消失或变为不可见。
|
|
1510
|
+
*/
|
|
1511
|
+
function waitForHidden(selector, timeoutMs) {
|
|
1512
|
+
return new Promise((resolve, reject) => {
|
|
1513
|
+
const existing = document.querySelector(selector);
|
|
1514
|
+
if (!existing) {
|
|
1515
|
+
resolve();
|
|
1516
|
+
return;
|
|
1517
|
+
}
|
|
1518
|
+
const style = window.getComputedStyle(existing);
|
|
1519
|
+
if (style.display === "none" || style.visibility === "hidden") {
|
|
1520
|
+
resolve();
|
|
1521
|
+
return;
|
|
1522
|
+
}
|
|
1523
|
+
const timer = setTimeout(() => {
|
|
1524
|
+
observer.disconnect();
|
|
1525
|
+
reject(/* @__PURE__ */ new Error(`等待 "${selector}" 消失超时 (${timeoutMs}ms)`));
|
|
1526
|
+
}, timeoutMs);
|
|
1527
|
+
const observer = new MutationObserver(() => {
|
|
1528
|
+
const el = document.querySelector(selector);
|
|
1529
|
+
if (!el) {
|
|
1530
|
+
clearTimeout(timer);
|
|
1531
|
+
observer.disconnect();
|
|
1532
|
+
resolve();
|
|
1533
|
+
return;
|
|
1534
|
+
}
|
|
1535
|
+
const s = window.getComputedStyle(el);
|
|
1536
|
+
if (s.display === "none" || s.visibility === "hidden") {
|
|
1537
|
+
clearTimeout(timer);
|
|
1538
|
+
observer.disconnect();
|
|
1539
|
+
resolve();
|
|
1540
|
+
}
|
|
1541
|
+
});
|
|
1542
|
+
observer.observe(document.body, {
|
|
1543
|
+
childList: true,
|
|
1544
|
+
subtree: true,
|
|
1545
|
+
attributes: true,
|
|
1546
|
+
attributeFilter: [
|
|
1547
|
+
"style",
|
|
1548
|
+
"class",
|
|
1549
|
+
"hidden"
|
|
1550
|
+
]
|
|
1551
|
+
});
|
|
1552
|
+
});
|
|
1553
|
+
}
|
|
1554
|
+
/**
|
|
1555
|
+
* 等待页面中出现指定文本。
|
|
1556
|
+
*/
|
|
1557
|
+
function waitForText(text, timeoutMs) {
|
|
1558
|
+
return new Promise((resolve, reject) => {
|
|
1559
|
+
if (document.body.textContent?.includes(text)) {
|
|
1560
|
+
resolve();
|
|
1561
|
+
return;
|
|
1562
|
+
}
|
|
1563
|
+
const timer = setTimeout(() => {
|
|
1564
|
+
observer.disconnect();
|
|
1565
|
+
reject(/* @__PURE__ */ new Error(`等待文本 "${text}" 出现超时 (${timeoutMs}ms)`));
|
|
1566
|
+
}, timeoutMs);
|
|
1567
|
+
const observer = new MutationObserver(() => {
|
|
1568
|
+
if (document.body.textContent?.includes(text)) {
|
|
1569
|
+
clearTimeout(timer);
|
|
1570
|
+
observer.disconnect();
|
|
1571
|
+
resolve();
|
|
1572
|
+
}
|
|
1573
|
+
});
|
|
1574
|
+
observer.observe(document.body, {
|
|
1575
|
+
childList: true,
|
|
1576
|
+
subtree: true,
|
|
1577
|
+
characterData: true
|
|
1578
|
+
});
|
|
1579
|
+
});
|
|
1580
|
+
}
|
|
1581
|
+
function createWaitTool() {
|
|
1582
|
+
return {
|
|
1583
|
+
name: "wait",
|
|
1584
|
+
description: [
|
|
1585
|
+
"Wait for DOM changes on the current page.",
|
|
1586
|
+
"Actions: wait_for_selector (element appears), wait_for_hidden (element disappears),",
|
|
1587
|
+
"wait_for_text (specific text appears in page)."
|
|
1588
|
+
].join(" "),
|
|
1589
|
+
schema: Type.Object({
|
|
1590
|
+
action: Type.String({ description: "Wait action: wait_for_selector | wait_for_hidden | wait_for_text" }),
|
|
1591
|
+
selector: Type.Optional(Type.String({ description: "CSS selector for wait_for_selector/wait_for_hidden" })),
|
|
1592
|
+
text: Type.Optional(Type.String({ description: "Text to wait for in wait_for_text" })),
|
|
1593
|
+
timeout: Type.Optional(Type.Number({ description: "Timeout in milliseconds (default: 10000)" }))
|
|
1594
|
+
}),
|
|
1595
|
+
execute: async (params) => {
|
|
1596
|
+
const action = params.action;
|
|
1597
|
+
const timeoutMs = params.timeout ?? DEFAULT_TIMEOUT;
|
|
1598
|
+
try {
|
|
1599
|
+
switch (action) {
|
|
1600
|
+
case "wait_for_selector": {
|
|
1601
|
+
const selector = params.selector;
|
|
1602
|
+
if (!selector) return { content: "缺少 selector 参数" };
|
|
1603
|
+
await waitForSelector(selector, timeoutMs);
|
|
1604
|
+
return { content: `元素 "${selector}" 已出现` };
|
|
1605
|
+
}
|
|
1606
|
+
case "wait_for_hidden": {
|
|
1607
|
+
const selector = params.selector;
|
|
1608
|
+
if (!selector) return { content: "缺少 selector 参数" };
|
|
1609
|
+
await waitForHidden(selector, timeoutMs);
|
|
1610
|
+
return { content: `元素 "${selector}" 已消失` };
|
|
1611
|
+
}
|
|
1612
|
+
case "wait_for_text": {
|
|
1613
|
+
const text = params.text;
|
|
1614
|
+
if (!text) return { content: "缺少 text 参数" };
|
|
1615
|
+
await waitForText(text, timeoutMs);
|
|
1616
|
+
return { content: `文本 "${text}" 已出现` };
|
|
1617
|
+
}
|
|
1618
|
+
default: return { content: `未知的等待动作: ${action}` };
|
|
1619
|
+
}
|
|
1620
|
+
} catch (err) {
|
|
1621
|
+
return {
|
|
1622
|
+
content: `等待操作 "${action}" 失败: ${err instanceof Error ? err.message : String(err)}`,
|
|
1623
|
+
details: {
|
|
1624
|
+
error: true,
|
|
1625
|
+
action
|
|
1626
|
+
}
|
|
1627
|
+
};
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
};
|
|
1631
|
+
}
|
|
1632
|
+
|
|
1633
|
+
//#endregion
|
|
1634
|
+
//#region src/web/evaluate-tool.ts
|
|
1635
|
+
/**
|
|
1636
|
+
* Evaluate Tool — 在页面上下文中执行任意 JavaScript 表达式。
|
|
1637
|
+
*
|
|
1638
|
+
* 替代 Playwright 的 page.evaluate()。
|
|
1639
|
+
* 运行环境:浏览器 Content Script。
|
|
1640
|
+
*
|
|
1641
|
+
* 这是最灵活的工具 — 当其他 tools 无法满足需求时,
|
|
1642
|
+
* AI 可以直接编写 JS 代码来操作页面。
|
|
1643
|
+
*
|
|
1644
|
+
* 支持 2 种动作:
|
|
1645
|
+
* evaluate — 执行 JS 表达式并返回结果
|
|
1646
|
+
* evaluate_handle — 执行 JS 并返回序列化的 DOM 信息
|
|
1647
|
+
*/
|
|
1648
|
+
/**
|
|
1649
|
+
* 安全执行 JS 表达式,捕获错误并序列化结果。
|
|
1650
|
+
*/
|
|
1651
|
+
function safeEvaluate(expression) {
|
|
1652
|
+
try {
|
|
1653
|
+
return { result: new Function(`"use strict"; return (${expression});`)() };
|
|
1654
|
+
} catch (err) {
|
|
1655
|
+
try {
|
|
1656
|
+
return { result: new Function(`"use strict"; ${expression}`)() };
|
|
1657
|
+
} catch (err2) {
|
|
1658
|
+
return { error: err2 instanceof Error ? err2.message : String(err2) };
|
|
1659
|
+
}
|
|
1660
|
+
}
|
|
1661
|
+
}
|
|
1662
|
+
/**
|
|
1663
|
+
* 将执行结果序列化为字符串(处理 DOM 元素、循环引用等)。
|
|
1664
|
+
*/
|
|
1665
|
+
function serializeResult(value) {
|
|
1666
|
+
if (value === void 0) return "undefined";
|
|
1667
|
+
if (value === null) return "null";
|
|
1668
|
+
if (value instanceof Element) return `<${value.tagName.toLowerCase()}${value.id ? `#${value.id}` : ""}> "${value.textContent?.trim().slice(0, 100) ?? ""}"`;
|
|
1669
|
+
if (value instanceof NodeList || value instanceof HTMLCollection) {
|
|
1670
|
+
const items = Array.from(value).map((el, i) => ` ${i}: ${serializeResult(el)}`);
|
|
1671
|
+
return `[${value.length} elements]\n${items.join("\n")}`;
|
|
1672
|
+
}
|
|
1673
|
+
try {
|
|
1674
|
+
return JSON.stringify(value, null, 2);
|
|
1675
|
+
} catch {
|
|
1676
|
+
return String(value);
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
function createEvaluateTool() {
|
|
1680
|
+
return {
|
|
1681
|
+
name: "evaluate",
|
|
1682
|
+
description: [
|
|
1683
|
+
"Execute JavaScript code in the current page context.",
|
|
1684
|
+
"Use this when other tools cannot accomplish the task.",
|
|
1685
|
+
"Can access document, window, and all page APIs."
|
|
1686
|
+
].join(" "),
|
|
1687
|
+
schema: Type.Object({ expression: Type.String({ description: "JavaScript expression or code block to execute. Has access to document, window, etc." }) }),
|
|
1688
|
+
execute: async (params) => {
|
|
1689
|
+
const expression = params.expression;
|
|
1690
|
+
if (!expression) return { content: "缺少 expression 参数" };
|
|
1691
|
+
const { result, error } = safeEvaluate(expression);
|
|
1692
|
+
if (error) return {
|
|
1693
|
+
content: `JS 执行错误: ${error}`,
|
|
1694
|
+
details: {
|
|
1695
|
+
error: true,
|
|
1696
|
+
expression
|
|
1697
|
+
}
|
|
1698
|
+
};
|
|
1699
|
+
return { content: serializeResult(result) };
|
|
1700
|
+
}
|
|
1701
|
+
};
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
//#endregion
|
|
1705
|
+
//#region src/web/messaging.ts
|
|
1706
|
+
/**
|
|
1707
|
+
* 创建代理执行器 — 在 Service Worker 端使用。
|
|
1708
|
+
*
|
|
1709
|
+
* 它不直接执行 DOM 操作,而是通过 chrome.tabs.sendMessage
|
|
1710
|
+
* 把调用请求发给当前活动 tab 的 content script 执行。
|
|
1711
|
+
*
|
|
1712
|
+
* @returns execute 函数,签名与 ToolDefinition.execute 相同
|
|
1713
|
+
*/
|
|
1714
|
+
function createProxyExecutor() {
|
|
1715
|
+
return async (toolName, params) => {
|
|
1716
|
+
const callId = `${toolName}_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
1717
|
+
const [tab] = await chrome.tabs.query({
|
|
1718
|
+
active: true,
|
|
1719
|
+
currentWindow: true
|
|
1720
|
+
});
|
|
1721
|
+
if (!tab?.id) return { content: "错误:没有活动的浏览器标签页" };
|
|
1722
|
+
const message = {
|
|
1723
|
+
type: "AUTOPILOT_TOOL_CALL",
|
|
1724
|
+
toolName,
|
|
1725
|
+
params,
|
|
1726
|
+
callId
|
|
1727
|
+
};
|
|
1728
|
+
try {
|
|
1729
|
+
return (await chrome.tabs.sendMessage(tab.id, message)).result;
|
|
1730
|
+
} catch (err) {
|
|
1731
|
+
return {
|
|
1732
|
+
content: `工具调用失败(content script 可能未加载): ${err instanceof Error ? err.message : String(err)}`,
|
|
1733
|
+
details: {
|
|
1734
|
+
error: true,
|
|
1735
|
+
toolName
|
|
1736
|
+
}
|
|
1737
|
+
};
|
|
1738
|
+
}
|
|
1739
|
+
};
|
|
1740
|
+
}
|
|
1741
|
+
/**
|
|
1742
|
+
* 在 Content Script 端注册工具执行处理器。
|
|
1743
|
+
*
|
|
1744
|
+
* 监听来自 Service Worker 的 AUTOPILOT_TOOL_CALL 消息,
|
|
1745
|
+
* 根据 toolName 找到对应的执行函数,执行后返回结果。
|
|
1746
|
+
*
|
|
1747
|
+
* @param executors 工具名称 → 执行函数的映射
|
|
1748
|
+
*/
|
|
1749
|
+
function registerToolHandler(executors) {
|
|
1750
|
+
chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
|
|
1751
|
+
const msg = message;
|
|
1752
|
+
if (msg?.type !== "AUTOPILOT_TOOL_CALL") return false;
|
|
1753
|
+
const executor = executors.get(msg.toolName);
|
|
1754
|
+
if (!executor) {
|
|
1755
|
+
sendResponse({
|
|
1756
|
+
type: "AUTOPILOT_TOOL_RESULT",
|
|
1757
|
+
callId: msg.callId,
|
|
1758
|
+
result: { content: `未知工具: ${msg.toolName}` }
|
|
1759
|
+
});
|
|
1760
|
+
return true;
|
|
1761
|
+
}
|
|
1762
|
+
executor(msg.params).then((result) => {
|
|
1763
|
+
sendResponse({
|
|
1764
|
+
type: "AUTOPILOT_TOOL_RESULT",
|
|
1765
|
+
callId: msg.callId,
|
|
1766
|
+
result
|
|
1767
|
+
});
|
|
1768
|
+
}).catch((err) => {
|
|
1769
|
+
sendResponse({
|
|
1770
|
+
type: "AUTOPILOT_TOOL_RESULT",
|
|
1771
|
+
callId: msg.callId,
|
|
1772
|
+
result: {
|
|
1773
|
+
content: `工具 ${msg.toolName} 执行异常: ${err instanceof Error ? err.message : String(err)}`,
|
|
1774
|
+
details: { error: true }
|
|
1775
|
+
}
|
|
1776
|
+
});
|
|
1777
|
+
});
|
|
1778
|
+
return true;
|
|
1779
|
+
});
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
//#endregion
|
|
1783
|
+
//#region src/web/index.ts
|
|
1784
|
+
/**
|
|
1785
|
+
* WebAgent — 浏览器端 AI Agent 类。
|
|
1786
|
+
*
|
|
1787
|
+
* 封装了完整的 Agent 能力,可在浏览器中独立运行:
|
|
1788
|
+
* - 对话(chat) → 发消息、获取 AI 回复
|
|
1789
|
+
* - 工具注册 → 注册内置 Web 工具或自定义工具
|
|
1790
|
+
* - 决策循环 → 复用 core/agent-loop.ts 的通用逻辑
|
|
1791
|
+
* - AI 连接 → 复用 core/ai-client.ts(基于 fetch,跨平台)
|
|
1792
|
+
*
|
|
1793
|
+
* 使用示例:
|
|
1794
|
+
* ```ts
|
|
1795
|
+
* const agent = new WebAgent({ token: "ghp_xxx", provider: "copilot" });
|
|
1796
|
+
* agent.registerTools(); // 注册内置 Web 工具
|
|
1797
|
+
* agent.callbacks.onText = (text) => console.log(text);
|
|
1798
|
+
*
|
|
1799
|
+
* const result = await agent.chat("获取页面标题");
|
|
1800
|
+
* console.log(result.reply);
|
|
1801
|
+
* ```
|
|
1802
|
+
*
|
|
1803
|
+
* 架构位置:
|
|
1804
|
+
* ┌──────────────────────────────────────────────────┐
|
|
1805
|
+
* │ WebAgent(浏览器端入口) │
|
|
1806
|
+
* │ ┌──────────┐ ┌────────────┐ ┌──────────────┐ │
|
|
1807
|
+
* │ │ core/ │ │ core/ │ │ web/ │ │
|
|
1808
|
+
* │ │ ai-client│ │ agent-loop │ │ (DOM/导航等)│ │
|
|
1809
|
+
* │ │ (fetch) │ │ (通用循环) │ │ │ │
|
|
1810
|
+
* │ └──────────┘ └────────────┘ └──────────────┘ │
|
|
1811
|
+
* └──────────────────────────────────────────────────┘
|
|
1812
|
+
*/
|
|
1813
|
+
var WebAgent = class {
|
|
1814
|
+
/** 用户传入的自定义 AI 客户端实例(优先级高于 token/provider) */
|
|
1815
|
+
client;
|
|
1816
|
+
token;
|
|
1817
|
+
provider;
|
|
1818
|
+
model;
|
|
1819
|
+
baseURL;
|
|
1820
|
+
dryRun;
|
|
1821
|
+
maxRounds;
|
|
1822
|
+
customSystemPrompt;
|
|
1823
|
+
/** 多轮对话记忆开关 */
|
|
1824
|
+
memory;
|
|
1825
|
+
/** 对话历史(memory 开启时自动累积) */
|
|
1826
|
+
history = [];
|
|
1827
|
+
/** 自动快照开关 */
|
|
1828
|
+
autoSnapshot;
|
|
1829
|
+
/** 快照选项 */
|
|
1830
|
+
snapshotOptions;
|
|
1831
|
+
/** 工具注册表实例 — 每个 WebAgent 拥有独立的工具集 */
|
|
1832
|
+
registry = new ToolRegistry();
|
|
1833
|
+
/** 事件回调 — 绑定后可实时获取 Agent 进度,用于 UI 展示 */
|
|
1834
|
+
callbacks = {};
|
|
1835
|
+
constructor(options) {
|
|
1836
|
+
this.client = options.client;
|
|
1837
|
+
this.token = options.token || "";
|
|
1838
|
+
this.provider = options.provider ?? "copilot";
|
|
1839
|
+
this.model = options.model ?? "gpt-4o";
|
|
1840
|
+
this.baseURL = options.baseURL;
|
|
1841
|
+
this.dryRun = options.dryRun ?? false;
|
|
1842
|
+
this.maxRounds = options.maxRounds ?? 10;
|
|
1843
|
+
this.customSystemPrompt = options.systemPrompt;
|
|
1844
|
+
this.memory = options.memory ?? false;
|
|
1845
|
+
this.autoSnapshot = options.autoSnapshot ?? true;
|
|
1846
|
+
this.snapshotOptions = options.snapshotOptions ?? {};
|
|
1847
|
+
}
|
|
1848
|
+
/** 注册所有内置 Web 工具(dom, navigate, page_info, wait, evaluate) */
|
|
1849
|
+
registerTools() {
|
|
1850
|
+
this.registry.register(createDomTool());
|
|
1851
|
+
this.registry.register(createNavigateTool());
|
|
1852
|
+
this.registry.register(createPageInfoTool());
|
|
1853
|
+
this.registry.register(createWaitTool());
|
|
1854
|
+
this.registry.register(createEvaluateTool());
|
|
1855
|
+
}
|
|
1856
|
+
/** 注册一个自定义工具 */
|
|
1857
|
+
registerTool(tool) {
|
|
1858
|
+
this.registry.register(tool);
|
|
1859
|
+
}
|
|
1860
|
+
/** 获取所有已注册的工具定义列表 */
|
|
1861
|
+
getTools() {
|
|
1862
|
+
return this.registry.getDefinitions();
|
|
1863
|
+
}
|
|
1864
|
+
/** 设置 API Token */
|
|
1865
|
+
setToken(token) {
|
|
1866
|
+
this.token = token;
|
|
1867
|
+
}
|
|
1868
|
+
/**
|
|
1869
|
+
* 设置自定义 AI 客户端实例。
|
|
1870
|
+
*
|
|
1871
|
+
* 传入后将优先使用该实例进行对话,忽略 token / provider / model / baseURL。
|
|
1872
|
+
* 传入 undefined 可恢复使用内置客户端。
|
|
1873
|
+
*/
|
|
1874
|
+
setClient(client) {
|
|
1875
|
+
this.client = client;
|
|
1876
|
+
}
|
|
1877
|
+
/** 设置 AI 提供商 */
|
|
1878
|
+
setProvider(provider) {
|
|
1879
|
+
this.provider = provider;
|
|
1880
|
+
}
|
|
1881
|
+
/** 设置模型 */
|
|
1882
|
+
setModel(model) {
|
|
1883
|
+
this.model = model;
|
|
1884
|
+
}
|
|
1885
|
+
/** 切换干运行模式 */
|
|
1886
|
+
setDryRun(enabled) {
|
|
1887
|
+
this.dryRun = enabled;
|
|
1888
|
+
}
|
|
1889
|
+
/** 设置自定义系统提示词 */
|
|
1890
|
+
setSystemPrompt(prompt) {
|
|
1891
|
+
this.customSystemPrompt = prompt;
|
|
1892
|
+
}
|
|
1893
|
+
/** 开启或关闭多轮对话记忆 */
|
|
1894
|
+
setMemory(enabled) {
|
|
1895
|
+
this.memory = enabled;
|
|
1896
|
+
if (!enabled) this.history = [];
|
|
1897
|
+
}
|
|
1898
|
+
/** 获取当前记忆开关状态 */
|
|
1899
|
+
getMemory() {
|
|
1900
|
+
return this.memory;
|
|
1901
|
+
}
|
|
1902
|
+
/** 开启或关闭自动快照 */
|
|
1903
|
+
setAutoSnapshot(enabled) {
|
|
1904
|
+
this.autoSnapshot = enabled;
|
|
1905
|
+
}
|
|
1906
|
+
/** 获取当前自动快照开关状态 */
|
|
1907
|
+
getAutoSnapshot() {
|
|
1908
|
+
return this.autoSnapshot;
|
|
1909
|
+
}
|
|
1910
|
+
/** 设置快照选项(视口裁剪、智能剪枝等) */
|
|
1911
|
+
setSnapshotOptions(options) {
|
|
1912
|
+
this.snapshotOptions = options;
|
|
1913
|
+
}
|
|
1914
|
+
/** 获取当前快照选项 */
|
|
1915
|
+
getSnapshotOptions() {
|
|
1916
|
+
return { ...this.snapshotOptions };
|
|
1917
|
+
}
|
|
1918
|
+
/** 清空对话历史(不影响记忆开关) */
|
|
1919
|
+
clearHistory() {
|
|
1920
|
+
this.history = [];
|
|
1921
|
+
}
|
|
1922
|
+
/**
|
|
1923
|
+
* 发送消息并获取 AI 回复(含完整工具调用循环)。
|
|
1924
|
+
*
|
|
1925
|
+
* 内部流程(全部复用 core):
|
|
1926
|
+
* 1. createAIClient() → 创建 fetch AI 客户端
|
|
1927
|
+
* 2. buildSystemPrompt() → 构建系统提示词
|
|
1928
|
+
* 3. executeAgentLoop() → 执行决策循环
|
|
1929
|
+
* 4. callbacks → 实时通知 UI
|
|
1930
|
+
*/
|
|
1931
|
+
async chat(message) {
|
|
1932
|
+
const client = this.client ?? this.createBuiltinClient();
|
|
1933
|
+
let systemPrompt = this.customSystemPrompt ?? buildSystemPrompt({ tools: this.registry.getDefinitions() });
|
|
1934
|
+
if (this.autoSnapshot) try {
|
|
1935
|
+
const snapshot = generateSnapshot(document.body, {
|
|
1936
|
+
maxDepth: 8,
|
|
1937
|
+
...this.snapshotOptions
|
|
1938
|
+
});
|
|
1939
|
+
this.callbacks.onSnapshot?.(snapshot);
|
|
1940
|
+
systemPrompt += [
|
|
1941
|
+
"\n\n## 当前页面 DOM 快照(实时生成)\n",
|
|
1942
|
+
"每个元素末尾的 ref=\"...\" 是基于层级位置生成的唯一路径。",
|
|
1943
|
+
"操作元素时,必须使用 ref 路径作为 selector 参数(如 /body/main/form/input)。\n",
|
|
1944
|
+
"```",
|
|
1945
|
+
snapshot,
|
|
1946
|
+
"```\n",
|
|
1947
|
+
"## 操作规则\n",
|
|
1948
|
+
"1. 从快照中找到目标元素,复制其 ref 路径。",
|
|
1949
|
+
"2. 将 ref 路径作为 dom 工具的 selector 参数传入。",
|
|
1950
|
+
"3. 禁止猜测 CSS 选择器(如 \"button\"、\"#send\"),必须使用快照中的 ref。",
|
|
1951
|
+
"4. 如果快照中看不到目标元素,先滚动页面或调整 maxDepth 获取更深的快照。",
|
|
1952
|
+
"5. 先规划操作步骤,再按顺序逐步执行。",
|
|
1953
|
+
"6. **语义选择**:当页面存在多个文本相同的元素时,严禁随意选择。必须根据 ref 路径的父级结构、周围兄弟元素、所在功能区域来判断哪个才是用户意图中的目标。例如聊天区的\"发送\"和表单的\"提交\"是不同功能,要结合用户当前操作的上下文来选择正确的元素。"
|
|
1954
|
+
].join("\n");
|
|
1955
|
+
} catch {}
|
|
1956
|
+
const result = await executeAgentLoop({
|
|
1957
|
+
client,
|
|
1958
|
+
registry: this.registry,
|
|
1959
|
+
systemPrompt,
|
|
1960
|
+
message,
|
|
1961
|
+
history: this.memory ? this.history : void 0,
|
|
1962
|
+
dryRun: this.dryRun,
|
|
1963
|
+
maxRounds: this.maxRounds,
|
|
1964
|
+
callbacks: this.callbacks
|
|
1965
|
+
});
|
|
1966
|
+
if (this.memory) this.history = result.messages;
|
|
1967
|
+
return result;
|
|
1968
|
+
}
|
|
1969
|
+
/**
|
|
1970
|
+
* 创建内置 AI 客户端(基于 token / provider / model 配置)。
|
|
1971
|
+
*
|
|
1972
|
+
* @throws 未设置 token 时抛出 Error
|
|
1973
|
+
*/
|
|
1974
|
+
createBuiltinClient() {
|
|
1975
|
+
if (!this.token) throw new Error("未设置 Token,请先调用 setToken() 或传入自定义 client");
|
|
1976
|
+
return createAIClient({
|
|
1977
|
+
provider: this.provider,
|
|
1978
|
+
model: this.model,
|
|
1979
|
+
apiKey: this.token,
|
|
1980
|
+
baseURL: this.baseURL
|
|
1981
|
+
});
|
|
1982
|
+
}
|
|
1983
|
+
};
|
|
1984
|
+
|
|
1985
|
+
//#endregion
|
|
1986
|
+
export { WebAgent, createDomTool, createEvaluateTool, createNavigateTool, createPageInfoTool, createProxyExecutor, createWaitTool, generateSnapshot, registerToolHandler };
|
|
1987
|
+
//# sourceMappingURL=index.mjs.map
|