agentpage 0.0.46 → 0.0.50
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/dist/index.mjs +436 -32
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -9,7 +9,10 @@
|
|
|
9
9
|
> 核心主张:通过 **Prompt + Tools + 路由**,快速为网站实现 AI 赋能,并构建**前端运行时 AI Skill**。AutoPilot 本质上是一个运行在前端浏览器中的 AI Agent。
|
|
10
10
|
|
|
11
11
|
[](LICENSE)
|
|
12
|
+
<a href="https://www.typescriptlang.org/"><img src="https://img.shields.io/badge/language-TypeScript-3178C6" alt="TypeScript" /></a>
|
|
12
13
|
<a href="https://www.npmjs.com/package/agentpage"><img src="https://img.shields.io/npm/v/agentpage" alt="npm" /></a>
|
|
14
|
+
<a href="https://www.npmjs.com/package/agentpage"><img src="https://img.shields.io/npm/dm/agentpage" alt="downloads" /></a>
|
|
15
|
+
<a href="https://bundlephobia.com/package/agentpage"><img src="https://img.shields.io/bundlephobia/minzip/agentpage" alt="minzipped size" /></a>
|
|
13
16
|
|
|
14
17
|
AutoPilot 的目标不是生成文本,而是在浏览器中完成真实任务:点击、填写、导航、等待、执行脚本,并在每一轮根据最新页面状态持续推进。
|
|
15
18
|
|
|
@@ -76,7 +79,7 @@ npm install agentpage
|
|
|
76
79
|
|
|
77
80
|
- **Prompt + Tools + 路由三层解耦**:可以快速把"可执行 AI 能力"植入现有前端系统,按路由渐进式接入,支持"项目级工具 + 路由级工具"组合。
|
|
78
81
|
- **增量任务消费协议(REMAINING)**:任务不是一次性执行,而是逐轮消费收敛。每轮只做当前快照可执行的动作,通过 `REMAINING` 协议跟踪进度,支持协议修复和启发式回退,确保复杂多步任务稳定收敛。
|
|
79
|
-
- **
|
|
82
|
+
- **9 层保护机制**:冗余拦截、快照防抖、元素恢复、Not-found 重试对话流、导航刷新、空转检测、重复批次防自转、协议修复、快照指纹变化检测 —— 目标是**稳定收敛**,而不是偶然成功。
|
|
80
83
|
- **Playwright 级别交互语义**:完整 pointer/mouse 事件链、4 种 scrollIntoView 策略轮换、actionability 五重检查(可见/稳定/可用/可编辑/遮挡)、智能重定向 retarget、隐藏控件代理点击(ElementPlus/AntD)、`select_option` value/label/index 三策略。
|
|
81
84
|
- **运行时事件信号追踪**:通过 `EventTarget.prototype` 补丁全局追踪事件绑定,快照中输出 `listeners="clk,inp,chg"` 信号,帮助 AI 精准识别真实可交互元素,而非猜测。
|
|
82
85
|
- **效果验证机制(Effect Check)**:每轮行动前自动检查上轮操作是否在当前快照中生效,未生效则尝试邻近元素,避免重复点击无效目标。
|
|
@@ -229,6 +232,7 @@ Round 3: 执行 C → REMAINING: DONE
|
|
|
229
232
|
| 重复批次防自转 | 连续两轮相同任务批次 | 直接终止 |
|
|
230
233
|
| 协议修复回合 | remaining 未完成却无工具调用 | 注入强约束提示 |
|
|
231
234
|
| 轮次稳定等待 | 本轮有 DOM 变化动作 | loading hidden + DOM quiet(200ms/4s) |
|
|
235
|
+
| 快照指纹变化检测 | 本轮有 DOM 变更动作且行动后指纹不变 | 注入 `Snapshot unchanged` 提示,强制模型换目标 |
|
|
232
236
|
|
|
233
237
|
### 5. 停机条件
|
|
234
238
|
|
package/dist/index.mjs
CHANGED
|
@@ -88,6 +88,29 @@ function extractHashSelectorRef(toolInput) {
|
|
|
88
88
|
return m ? m[1] : null;
|
|
89
89
|
}
|
|
90
90
|
/**
|
|
91
|
+
* 快照指纹计算 — 用于轮次间快照变化检测。
|
|
92
|
+
*
|
|
93
|
+
* 元素的 #hashID(如 `#1kry9hw`)可能因 DOM 重新渲染而变化,
|
|
94
|
+
* 但页面实际内容并未改变。因此先将 hashID 替换为占位符 `#_`,
|
|
95
|
+
* 再计算 djb2 哈希,确保指纹只反映真实页面结构和文本差异。
|
|
96
|
+
*
|
|
97
|
+
* 用途:轮次行动前后各算一次指纹,若一致说明操作未产生任何可见效果。
|
|
98
|
+
*/
|
|
99
|
+
function computeSnapshotFingerprint(snapshot) {
|
|
100
|
+
if (!snapshot) return "";
|
|
101
|
+
return _djb2(snapshot.replace(/#[a-z0-9]{4,}/gi, "#_"));
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* djb2 字符串哈希(非加密)。
|
|
105
|
+
*
|
|
106
|
+
* 纯粹用于快照指纹比对,不用于安全场景。
|
|
107
|
+
*/
|
|
108
|
+
function _djb2(str) {
|
|
109
|
+
let hash = 5381;
|
|
110
|
+
for (let i = 0; i < str.length; i++) hash = (hash << 5) + hash + str.charCodeAt(i) | 0;
|
|
111
|
+
return (hash >>> 0).toString(36);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
91
114
|
* 构建任务数组。
|
|
92
115
|
*
|
|
93
116
|
* 作用:把一轮工具调用规整成稳定字符串数组,
|
|
@@ -542,7 +565,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
542
565
|
];
|
|
543
566
|
if (hasErrors) contextParts.push("", "Last step failed. Retry differently or skip to other targets.");
|
|
544
567
|
else contextParts.push("", "If fully done, reply summary only (no tools).");
|
|
545
|
-
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous executed:", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`), "If any had no visible effect, try a
|
|
568
|
+
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous executed:", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`), "If any had no visible effect (snapshot unchanged), do NOT repeat — try a child <a>/<button> inside the target, or a sibling/parent with stronger click signal.");
|
|
546
569
|
if (previousRoundPlannedTasks && previousRoundPlannedTasks.length > 0) contextParts.push("", "Previous planned:", ...previousRoundPlannedTasks.map((task, index) => `${index + 1}. ${task}`));
|
|
547
570
|
if (previousRoundModelOutput) contextParts.push("", "Previous model output:", previousRoundModelOutput);
|
|
548
571
|
const lastEntry = trace[trace.length - 1];
|
|
@@ -894,6 +917,7 @@ async function executeAgentLoop(params) {
|
|
|
894
917
|
callbacks?.onRound?.(round);
|
|
895
918
|
usedRounds = round + 1;
|
|
896
919
|
if (!pageContext.latestSnapshot) await refreshSnapshot();
|
|
920
|
+
const roundStartFingerprint = computeSnapshotFingerprint(pageContext.latestSnapshot || "");
|
|
897
921
|
const effectivePrompt = stripSnapshotFromPrompt(systemPrompt);
|
|
898
922
|
const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint);
|
|
899
923
|
if (pendingNotFoundRetry && pendingNotFoundRetry.tasks.length > 0) chatMessages.push({
|
|
@@ -972,10 +996,12 @@ async function executeAgentLoop(params) {
|
|
|
972
996
|
}
|
|
973
997
|
if (consecutiveSamePlannedBatch >= 2 && !lastRoundHadError) protocolViolationHint = [
|
|
974
998
|
"Repeated action warning:",
|
|
975
|
-
"- You performed the EXACT same tool call(s) as the previous round, but
|
|
976
|
-
"This round you MUST do ONE of:",
|
|
977
|
-
"1)
|
|
978
|
-
"2)
|
|
999
|
+
"- You performed the EXACT same tool call(s) as the previous round, but NO visible change occurred in the snapshot.",
|
|
1000
|
+
"The clicked element did not trigger navigation or DOM change. This round you MUST do ONE of:",
|
|
1001
|
+
"1) Look INSIDE the clicked container for an <a> link, <button>, or child element with clk/pdn/mdn listener, and click THAT instead;",
|
|
1002
|
+
"2) Try a parent or sibling element with stronger click signal (clk/pdn/mdn listener);",
|
|
1003
|
+
"3) Try a completely different approach (e.g., search, filter, or navigate via sidebar);",
|
|
1004
|
+
"4) If the task is truly complete, return REMAINING: DONE with no tool calls.",
|
|
979
1005
|
"Do NOT repeat the same action again."
|
|
980
1006
|
].join("\n");
|
|
981
1007
|
if (dryRun) {
|
|
@@ -1061,16 +1087,6 @@ async function executeAgentLoop(params) {
|
|
|
1061
1087
|
if (finalReply) callbacks?.onText?.(finalReply);
|
|
1062
1088
|
break;
|
|
1063
1089
|
}
|
|
1064
|
-
if (consecutiveNoProtocolRounds >= 5) {
|
|
1065
|
-
finalReply = response.text?.trim() || "任务已完成。";
|
|
1066
|
-
if (finalReply) callbacks?.onText?.(finalReply);
|
|
1067
|
-
break;
|
|
1068
|
-
}
|
|
1069
|
-
if (consecutiveNoProtocolRounds >= 3) protocolViolationHint = [
|
|
1070
|
-
"Protocol reminder: REMAINING protocol missing for 3+ rounds with tool calls.",
|
|
1071
|
-
"You MUST include REMAINING: <text> or REMAINING: DONE in every response.",
|
|
1072
|
-
"If the task is fully complete, return REMAINING: DONE with no tool calls."
|
|
1073
|
-
].join("\n");
|
|
1074
1090
|
const idleResult = detectIdleLoop(executedTaskCalls, consecutiveReadOnlyRounds);
|
|
1075
1091
|
if (idleResult === -1) {
|
|
1076
1092
|
finalReply = response.text?.trim() || "任务已完成。";
|
|
@@ -1080,6 +1096,31 @@ async function executeAgentLoop(params) {
|
|
|
1080
1096
|
consecutiveReadOnlyRounds = idleResult;
|
|
1081
1097
|
if (roundHasPotentialDomMutation) await runRoundStabilityBarrier();
|
|
1082
1098
|
await refreshSnapshot();
|
|
1099
|
+
if (roundHasPotentialDomMutation) {
|
|
1100
|
+
const roundEndFingerprint = computeSnapshotFingerprint(pageContext.latestSnapshot || "");
|
|
1101
|
+
if (roundEndFingerprint === roundStartFingerprint && roundStartFingerprint !== "") {
|
|
1102
|
+
const unchangedHint = [
|
|
1103
|
+
"Snapshot unchanged after action:",
|
|
1104
|
+
"- The page snapshot is IDENTICAL before and after your action(s) this round.",
|
|
1105
|
+
"- Your click/action had NO visible effect on the page. Do NOT repeat it.",
|
|
1106
|
+
"- Look INSIDE the target for <a>/<button>/child with clk listener, or try a parent/sibling with stronger signal, or use a completely different approach."
|
|
1107
|
+
].join("\n");
|
|
1108
|
+
protocolViolationHint = protocolViolationHint ? protocolViolationHint + "\n\n" + unchangedHint : unchangedHint;
|
|
1109
|
+
} else if (roundEndFingerprint !== roundStartFingerprint) consecutiveNoProtocolRounds = 0;
|
|
1110
|
+
}
|
|
1111
|
+
if (consecutiveNoProtocolRounds >= 5) {
|
|
1112
|
+
finalReply = response.text?.trim() || "任务已完成。";
|
|
1113
|
+
if (finalReply) callbacks?.onText?.(finalReply);
|
|
1114
|
+
break;
|
|
1115
|
+
}
|
|
1116
|
+
if (consecutiveNoProtocolRounds >= 3) {
|
|
1117
|
+
const noProtocolHint = [
|
|
1118
|
+
"Protocol reminder: REMAINING protocol missing for 3+ rounds with tool calls.",
|
|
1119
|
+
"You MUST include REMAINING: <text> or REMAINING: DONE in every response.",
|
|
1120
|
+
"If the task is fully complete, return REMAINING: DONE with no tool calls."
|
|
1121
|
+
].join("\n");
|
|
1122
|
+
protocolViolationHint = protocolViolationHint ? protocolViolationHint + "\n\n" + noProtocolHint : noProtocolHint;
|
|
1123
|
+
}
|
|
1083
1124
|
}
|
|
1084
1125
|
const resultMessages = [...history ?? [], {
|
|
1085
1126
|
role: "user",
|
|
@@ -1252,12 +1293,33 @@ var BaseAIClient = class {
|
|
|
1252
1293
|
};
|
|
1253
1294
|
|
|
1254
1295
|
//#endregion
|
|
1255
|
-
//#region src/core/ai-client/openai.ts
|
|
1296
|
+
//#region src/core/ai-client/models/openai.ts
|
|
1297
|
+
/** 单次请求默认超时时间(毫秒) */
|
|
1256
1298
|
const DEFAULT_REQUEST_TIMEOUT_MS = 45e3;
|
|
1299
|
+
/** JSON(非流式)模式超时后的最大重试次数 */
|
|
1257
1300
|
const JSON_TIMEOUT_RETRY_COUNT = 1;
|
|
1301
|
+
/**
|
|
1302
|
+
* 判断错误是否为请求超时错误(中)/ Check if error is a request timeout (EN).
|
|
1303
|
+
* 仅匹配 `fetchWithTimeout` 抛出的特定格式错误消息。
|
|
1304
|
+
*/
|
|
1258
1305
|
function isRequestTimeoutError(error) {
|
|
1259
1306
|
return error instanceof Error && /^AI request timeout \(\d+ms\)$/.test(error.message);
|
|
1260
1307
|
}
|
|
1308
|
+
/**
|
|
1309
|
+
* 带超时的 fetch 封装(中)/ Fetch wrapper with AbortController-based timeout (EN).
|
|
1310
|
+
*
|
|
1311
|
+
* 工作原理:
|
|
1312
|
+
* 1. 创建 AbortController,设置 setTimeout 在超时后调用 controller.abort()
|
|
1313
|
+
* 2. 将 controller.signal 注入 fetch 请求
|
|
1314
|
+
* 3. 若 fetch 被 abort,捕获 AbortError 并转换为语义明确的超时错误
|
|
1315
|
+
* 4. 无论成功或失败,finally 中清除定时器避免泄漏
|
|
1316
|
+
*
|
|
1317
|
+
* @param input - 请求 URL 或 Request 对象
|
|
1318
|
+
* @param init - fetch 请求配置
|
|
1319
|
+
* @param timeoutMs - 超时毫秒数(<=0 或非有限数时不设超时)
|
|
1320
|
+
* @returns fetch Response
|
|
1321
|
+
* @throws Error - 超时时抛出 "AI request timeout (Xms)"
|
|
1322
|
+
*/
|
|
1261
1323
|
async function fetchWithTimeout(input, init, timeoutMs) {
|
|
1262
1324
|
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return fetch(input, init);
|
|
1263
1325
|
const controller = new AbortController();
|
|
@@ -1275,11 +1337,30 @@ async function fetchWithTimeout(input, init, timeoutMs) {
|
|
|
1275
1337
|
}
|
|
1276
1338
|
}
|
|
1277
1339
|
/**
|
|
1278
|
-
* OpenAIClient 类(中)/
|
|
1340
|
+
* OpenAIClient 类(中)/ OpenAI & Copilot client implementation (EN).
|
|
1341
|
+
*
|
|
1342
|
+
* 继承 BaseAIClient,通过注入 chatHandler 实现 OpenAI Chat Completions 协议。
|
|
1343
|
+
* 根据 `config.stream`(默认 true)自动选择流式(SSE)或非流式(JSON)模式。
|
|
1344
|
+
*
|
|
1345
|
+
* 非流式模式特性:
|
|
1346
|
+
* - 使用 `fetchWithTimeout` + AbortController 实现请求级超时
|
|
1347
|
+
* - 超时后自动重试 1 次(`JSON_TIMEOUT_RETRY_COUNT`),应对单次网络抖动
|
|
1348
|
+
* - 非超时错误不重试,直接抛出
|
|
1349
|
+
*
|
|
1350
|
+
* 流式模式特性:
|
|
1351
|
+
* - 通过 SSE 逐 chunk 接收 delta 内容和工具调用片段
|
|
1352
|
+
* - 若服务端返回 application/json(如模型降级),自动回退为 JSON 解析
|
|
1353
|
+
* - 单次 chunk 读取超时 20s(`readTimeoutMs`)
|
|
1354
|
+
*
|
|
1355
|
+
* 本类也是 DeepSeek / Doubao / Qwen / MiniMax 等兼容 provider 的基类。
|
|
1279
1356
|
*/
|
|
1280
1357
|
var OpenAIClient = class extends BaseAIClient {
|
|
1281
1358
|
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
1282
1359
|
config;
|
|
1360
|
+
/**
|
|
1361
|
+
* 构造 OpenAIClient 实例。
|
|
1362
|
+
* @param config - AI 客户端配置,需包含 provider / model / apiKey
|
|
1363
|
+
*/
|
|
1283
1364
|
constructor(config) {
|
|
1284
1365
|
super({ chatHandler: async (params) => {
|
|
1285
1366
|
const req = buildOpenAIRequest(this.config, params);
|
|
@@ -1320,7 +1401,48 @@ var OpenAIClient = class extends BaseAIClient {
|
|
|
1320
1401
|
}
|
|
1321
1402
|
};
|
|
1322
1403
|
/**
|
|
1323
|
-
* 构建 OpenAI 请求(中)/ Build OpenAI chat request payload (EN).
|
|
1404
|
+
* 构建 OpenAI Chat Completions 请求(中)/ Build OpenAI chat request payload (EN).
|
|
1405
|
+
*
|
|
1406
|
+
* 将统一的 ChatParams 转换为 OpenAI 协议格式:
|
|
1407
|
+
* - system prompt 作为 messages 数组首条 system 消息
|
|
1408
|
+
* - 工具定义转换为 `{ type: "function", function: { name, description, parameters } }` 格式
|
|
1409
|
+
* - 流式模式设置 `stream: true` + `stream_options: { include_usage: true }` 获取用量
|
|
1410
|
+
* - 有工具时设置 `tool_choice: "auto"` + `parallel_tool_calls`(默认允许并行)
|
|
1411
|
+
* - temperature 固定 0.3(偏确定性,适合工具调用场景)
|
|
1412
|
+
* - 认证使用 `Authorization: Bearer <apiKey>`
|
|
1413
|
+
*
|
|
1414
|
+
* 构建后的请求体示例(流式 + 含工具):
|
|
1415
|
+
* ```json
|
|
1416
|
+
* // POST https://api.openai.com/v1/chat/completions
|
|
1417
|
+
* // Headers: { "Authorization": "Bearer sk-xxx", "Content-Type": "application/json" }
|
|
1418
|
+
* {
|
|
1419
|
+
* "model": "gpt-4o",
|
|
1420
|
+
* "messages": [
|
|
1421
|
+
* { "role": "system", "content": "You are a browser automation agent..." },
|
|
1422
|
+
* { "role": "user", "content": "Click the submit button" }
|
|
1423
|
+
* ],
|
|
1424
|
+
* "tools": [
|
|
1425
|
+
* {
|
|
1426
|
+
* "type": "function",
|
|
1427
|
+
* "function": {
|
|
1428
|
+
* "name": "dom",
|
|
1429
|
+
* "description": "DOM interaction tool. Actions: click, fill, ...",
|
|
1430
|
+
* "parameters": { "type": "object", "properties": { "action": { ... }, "selector": { ... } } }
|
|
1431
|
+
* }
|
|
1432
|
+
* }
|
|
1433
|
+
* ],
|
|
1434
|
+
* "tool_choice": "auto",
|
|
1435
|
+
* "parallel_tool_calls": true,
|
|
1436
|
+
* "temperature": 0.3,
|
|
1437
|
+
* "max_tokens": 4096,
|
|
1438
|
+
* "stream": true,
|
|
1439
|
+
* "stream_options": { "include_usage": true }
|
|
1440
|
+
* }
|
|
1441
|
+
* ```
|
|
1442
|
+
*
|
|
1443
|
+
* @param config - AI 客户端配置
|
|
1444
|
+
* @param params - 统一聊天参数(systemPrompt / messages / tools)
|
|
1445
|
+
* @returns 构建好的 HTTP 请求对象(url / method / headers / body)
|
|
1324
1446
|
*/
|
|
1325
1447
|
function buildOpenAIRequest(config, params) {
|
|
1326
1448
|
const baseURL = resolveBaseURL(config);
|
|
@@ -1360,7 +1482,43 @@ function buildOpenAIRequest(config, params) {
|
|
|
1360
1482
|
};
|
|
1361
1483
|
}
|
|
1362
1484
|
/**
|
|
1363
|
-
* 解析 OpenAI
|
|
1485
|
+
* 解析 OpenAI JSON 响应(中)
|
|
1486
|
+
*
|
|
1487
|
+
* 从 choices[0].message 中提取文本和工具调用,并映射 usage 统计。
|
|
1488
|
+
* 工具调用的 `arguments` 字段为 JSON 字符串,此处解析为对象。
|
|
1489
|
+
*
|
|
1490
|
+
* 输入示例(非流式 JSON 响应):
|
|
1491
|
+
* ```json
|
|
1492
|
+
* {
|
|
1493
|
+
* "choices": [{
|
|
1494
|
+
* "message": {
|
|
1495
|
+
* "content": null,
|
|
1496
|
+
* "tool_calls": [{
|
|
1497
|
+
* "id": "call_abc123",
|
|
1498
|
+
* "type": "function",
|
|
1499
|
+
* "function": {
|
|
1500
|
+
* "name": "dom",
|
|
1501
|
+
* "arguments": "{\"action\":\"click\",\"selector\":\"#submit-btn\"}"
|
|
1502
|
+
* }
|
|
1503
|
+
* }]
|
|
1504
|
+
* }
|
|
1505
|
+
* }],
|
|
1506
|
+
* "usage": { "prompt_tokens": 1200, "completion_tokens": 45 }
|
|
1507
|
+
* }
|
|
1508
|
+
* ```
|
|
1509
|
+
*
|
|
1510
|
+
* 输出(统一 AIChatResponse):
|
|
1511
|
+
* ```json
|
|
1512
|
+
* {
|
|
1513
|
+
* "text": undefined,
|
|
1514
|
+
* "toolCalls": [{ "id": "call_abc123", "name": "dom", "input": { "action": "click", "selector": "#submit-btn" } }],
|
|
1515
|
+
* "usage": { "inputTokens": 1200, "outputTokens": 45 }
|
|
1516
|
+
* }
|
|
1517
|
+
* ```
|
|
1518
|
+
*
|
|
1519
|
+
* @param data - OpenAI API 返回的原始 JSON 对象
|
|
1520
|
+
* @returns 统一的 AIChatResponse
|
|
1521
|
+
* @throws Error - choices 为空时抛出 "AI 未返回有效响应"
|
|
1364
1522
|
*/
|
|
1365
1523
|
function parseOpenAIResponse(data) {
|
|
1366
1524
|
const d = data;
|
|
@@ -1382,7 +1540,42 @@ function parseOpenAIResponse(data) {
|
|
|
1382
1540
|
};
|
|
1383
1541
|
}
|
|
1384
1542
|
/**
|
|
1385
|
-
*
|
|
1543
|
+
* 统一消息格式转换为 OpenAI 格式(中)/ Convert unified AIMessage[] to OpenAI message format (EN).
|
|
1544
|
+
*
|
|
1545
|
+
* 转换规则:
|
|
1546
|
+
* - system prompt → 首条 `{ role: "system", content }` 消息
|
|
1547
|
+
* - `tool` 角色 → 展开为多条 `{ role: "tool", tool_call_id, content }` 消息(OpenAI 要求每个结果独立一条)
|
|
1548
|
+
* - `assistant` 带 toolCalls → `{ role: "assistant", content, tool_calls }` 消息
|
|
1549
|
+
* - 其他(user / assistant 纯文本)→ 直接映射
|
|
1550
|
+
*
|
|
1551
|
+
* 输入示例(统一 AIMessage[]):
|
|
1552
|
+
* ```ts
|
|
1553
|
+
* messages = [
|
|
1554
|
+
* { role: "user", content: "帮我点击提交按钮" },
|
|
1555
|
+
* { role: "assistant", content: "好的", toolCalls: [
|
|
1556
|
+
* { id: "call_abc", name: "dom", input: { action: "click", selector: "#btn" } }
|
|
1557
|
+
* ]},
|
|
1558
|
+
* { role: "tool", content: [
|
|
1559
|
+
* { toolCallId: "call_abc", result: "点击成功" }
|
|
1560
|
+
* ]}
|
|
1561
|
+
* ]
|
|
1562
|
+
* ```
|
|
1563
|
+
*
|
|
1564
|
+
* 输出示例(OpenAI 格式):
|
|
1565
|
+
* ```json
|
|
1566
|
+
* [
|
|
1567
|
+
* { "role": "system", "content": "You are a browser automation agent..." },
|
|
1568
|
+
* { "role": "user", "content": "帮我点击提交按钮" },
|
|
1569
|
+
* { "role": "assistant", "content": "好的", "tool_calls": [
|
|
1570
|
+
* { "id": "call_abc", "type": "function", "function": { "name": "dom", "arguments": "{\"action\":\"click\",\"selector\":\"#btn\"}" } }
|
|
1571
|
+
* ]},
|
|
1572
|
+
* { "role": "tool", "tool_call_id": "call_abc", "content": "点击成功" }
|
|
1573
|
+
* ]
|
|
1574
|
+
* ```
|
|
1575
|
+
*
|
|
1576
|
+
* @param systemPrompt - 系统提示词
|
|
1577
|
+
* @param messages - 统一消息列表
|
|
1578
|
+
* @returns OpenAI 格式的消息数组
|
|
1386
1579
|
*/
|
|
1387
1580
|
function convertMessages$1(systemPrompt, messages) {
|
|
1388
1581
|
const result = [{
|
|
@@ -1413,7 +1606,52 @@ function convertMessages$1(systemPrompt, messages) {
|
|
|
1413
1606
|
return result;
|
|
1414
1607
|
}
|
|
1415
1608
|
/**
|
|
1416
|
-
* 解析 OpenAI SSE
|
|
1609
|
+
* 解析 OpenAI SSE 流式响应(中)/ Parse OpenAI SSE stream into unified AIChatResponse (EN).
|
|
1610
|
+
*
|
|
1611
|
+
* 工作原理:
|
|
1612
|
+
* 1. 通过 `consumeSSEJSON` 逐 chunk 消费 SSE 事件
|
|
1613
|
+
* 2. 文本内容(`delta.content`)逐 chunk 拼接为完整字符串
|
|
1614
|
+
* 3. 工具调用按 `delta.tool_calls[].index` 累积:
|
|
1615
|
+
* - 首个 delta 包含 id 和 name
|
|
1616
|
+
* - 后续 delta 只包含 arguments 片段,需要拼接
|
|
1617
|
+
* - 最终按 index 排序,逐个 JSON.parse 解析 arguments
|
|
1618
|
+
* 4. usage 信息来自最终 chunk(需 `stream_options.include_usage=true`)
|
|
1619
|
+
* 5. 遇到 `[DONE]` 信号自动结束(`stopOnDone: true`)
|
|
1620
|
+
*
|
|
1621
|
+
* SSE 流示例(含工具调用):
|
|
1622
|
+
* ```
|
|
1623
|
+
* data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_abc","function":{"name":"dom","arguments":""}}]}}]}
|
|
1624
|
+
* data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"action"}}]}}]}
|
|
1625
|
+
* data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":":\"click\",\"selector"}}]}}]}
|
|
1626
|
+
* data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":":\"#submit-btn\"}"}}]}}]}
|
|
1627
|
+
* data: {"choices":[{"delta":{}}],"usage":{"prompt_tokens":1200,"completion_tokens":45}}
|
|
1628
|
+
* data: [DONE]
|
|
1629
|
+
* ```
|
|
1630
|
+
*
|
|
1631
|
+
* SSE 流示例(纯文本):
|
|
1632
|
+
* ```
|
|
1633
|
+
* data: {"choices":[{"delta":{"content":"我"}}]}
|
|
1634
|
+
* data: {"choices":[{"delta":{"content":"已经"}}]}
|
|
1635
|
+
* data: {"choices":[{"delta":{"content":"完成了"}}]}
|
|
1636
|
+
* data: {"choices":[{"delta":{"content":"任务。"}}]}
|
|
1637
|
+
* data: {"choices":[{"delta":{}},"usage":{"prompt_tokens":800,"completion_tokens":12}]}
|
|
1638
|
+
* data: [DONE]
|
|
1639
|
+
* ```
|
|
1640
|
+
*
|
|
1641
|
+
* 最终输出(统一 AIChatResponse):
|
|
1642
|
+
* ```json
|
|
1643
|
+
* {
|
|
1644
|
+
* "text": "我已经完成了任务。",
|
|
1645
|
+
* "toolCalls": undefined,
|
|
1646
|
+
* "usage": { "inputTokens": 800, "outputTokens": 12 }
|
|
1647
|
+
* }
|
|
1648
|
+
* ```
|
|
1649
|
+
*
|
|
1650
|
+
* 回退:若 response.body 不可用(无 ReadableStream 支持),回退为 JSON 解析。
|
|
1651
|
+
*
|
|
1652
|
+
* @param response - OpenAI API 的流式 HTTP 响应
|
|
1653
|
+
* @param readTimeoutMs - 单次 chunk 读取超时(毫秒,默认 20000)
|
|
1654
|
+
* @returns 统一的 AIChatResponse(文本 + 工具调用 + usage)
|
|
1417
1655
|
*/
|
|
1418
1656
|
async function parseOpenAIStream(response, readTimeoutMs = 2e4) {
|
|
1419
1657
|
if (!response.body) return parseOpenAIResponse(await response.json());
|
|
@@ -1459,13 +1697,22 @@ async function parseOpenAIStream(response, readTimeoutMs = 2e4) {
|
|
|
1459
1697
|
}
|
|
1460
1698
|
|
|
1461
1699
|
//#endregion
|
|
1462
|
-
//#region src/core/ai-client/anthropic.ts
|
|
1700
|
+
//#region src/core/ai-client/models/anthropic.ts
|
|
1463
1701
|
/**
|
|
1464
1702
|
* AnthropicClient 类(中)/ AnthropicClient class (EN).
|
|
1703
|
+
*
|
|
1704
|
+
* 继承 BaseAIClient,通过注入 chatHandler 实现 Anthropic Messages API 的请求与响应处理。
|
|
1705
|
+
* 根据 `config.stream`(默认 true)自动选择 SSE 流式 或 JSON 非流式模式。
|
|
1706
|
+
*
|
|
1707
|
+
* 流式模式下,若服务端返回 `application/json`(如模型降级),自动回退为 JSON 解析。
|
|
1465
1708
|
*/
|
|
1466
1709
|
var AnthropicClient = class extends BaseAIClient {
|
|
1467
1710
|
/** AI 客户端配置(provider / model / apiKey / baseURL) */
|
|
1468
1711
|
config;
|
|
1712
|
+
/**
|
|
1713
|
+
* 构造 AnthropicClient 实例。
|
|
1714
|
+
* @param config - AI 客户端配置,需包含 provider="anthropic"、model、apiKey
|
|
1715
|
+
*/
|
|
1469
1716
|
constructor(config) {
|
|
1470
1717
|
super({ chatHandler: async (params) => {
|
|
1471
1718
|
const req = buildAnthropicRequest(this.config, params);
|
|
@@ -1497,7 +1744,39 @@ var AnthropicClient = class extends BaseAIClient {
|
|
|
1497
1744
|
}
|
|
1498
1745
|
};
|
|
1499
1746
|
/**
|
|
1500
|
-
* 构建 Anthropic 请求(中)/ Build Anthropic Messages API request (EN).
|
|
1747
|
+
* 构建 Anthropic Messages API 请求(中)/ Build Anthropic Messages API request payload (EN).
|
|
1748
|
+
*
|
|
1749
|
+
* 将统一的 ChatParams 转换为 Anthropic 协议格式:
|
|
1750
|
+
* - system prompt 放顶层 `system` 字段(非消息数组)
|
|
1751
|
+
* - 工具定义使用 `input_schema`(非 `parameters`)
|
|
1752
|
+
* - max_tokens 根据模型名自动调整(opus 系列 16384,其他 8192)
|
|
1753
|
+
* - 认证头使用 `x-api-key` + `anthropic-version`
|
|
1754
|
+
*
|
|
1755
|
+
* 构建后的请求体示例(流式 + 含工具):
|
|
1756
|
+
* ```json
|
|
1757
|
+
* // POST https://api.anthropic.com/v1/messages
|
|
1758
|
+
* // Headers: { "x-api-key": "sk-ant-xxx", "anthropic-version": "2023-06-01", "Content-Type": "application/json" }
|
|
1759
|
+
* {
|
|
1760
|
+
* "model": "claude-sonnet-4-20250514",
|
|
1761
|
+
* "max_tokens": 8192,
|
|
1762
|
+
* "system": "You are a browser automation agent...",
|
|
1763
|
+
* "messages": [
|
|
1764
|
+
* { "role": "user", "content": "Click the submit button" }
|
|
1765
|
+
* ],
|
|
1766
|
+
* "tools": [
|
|
1767
|
+
* {
|
|
1768
|
+
* "name": "dom",
|
|
1769
|
+
* "description": "DOM interaction tool. Actions: click, fill, ...",
|
|
1770
|
+
* "input_schema": { "type": "object", "properties": { "action": { ... }, "selector": { ... } } }
|
|
1771
|
+
* }
|
|
1772
|
+
* ],
|
|
1773
|
+
* "stream": true
|
|
1774
|
+
* }
|
|
1775
|
+
* ```
|
|
1776
|
+
*
|
|
1777
|
+
* @param config - AI 客户端配置
|
|
1778
|
+
* @param params - 统一聊天参数(systemPrompt / messages / tools)
|
|
1779
|
+
* @returns 构建好的 HTTP 请求对象(url / method / headers / body)
|
|
1501
1780
|
*/
|
|
1502
1781
|
function buildAnthropicRequest(config, params) {
|
|
1503
1782
|
const baseURL = resolveBaseURL(config);
|
|
@@ -1528,7 +1807,38 @@ function buildAnthropicRequest(config, params) {
|
|
|
1528
1807
|
};
|
|
1529
1808
|
}
|
|
1530
1809
|
/**
|
|
1531
|
-
* 解析 Anthropic 响应(中)/ Parse raw Anthropic response (EN).
|
|
1810
|
+
* 解析 Anthropic JSON 响应(中)/ Parse raw Anthropic JSON response into unified AIChatResponse (EN).
|
|
1811
|
+
*
|
|
1812
|
+
* 从 content 数组中提取所有 text 块(合并为字符串)和 tool_use 块(转为 AIToolCall),
|
|
1813
|
+
* 并映射 usage 字段为统一的 inputTokens / outputTokens。
|
|
1814
|
+
*
|
|
1815
|
+
* 输入示例(非流式 JSON 响应):
|
|
1816
|
+
* ```json
|
|
1817
|
+
* {
|
|
1818
|
+
* "content": [
|
|
1819
|
+
* { "type": "text", "text": "好的,我来点击提交按钮。" },
|
|
1820
|
+
* {
|
|
1821
|
+
* "type": "tool_use",
|
|
1822
|
+
* "id": "toolu_01A09q90qw90lq917835lhds",
|
|
1823
|
+
* "name": "dom",
|
|
1824
|
+
* "input": { "action": "click", "selector": "#submit-btn" }
|
|
1825
|
+
* }
|
|
1826
|
+
* ],
|
|
1827
|
+
* "usage": { "input_tokens": 1500, "output_tokens": 62 }
|
|
1828
|
+
* }
|
|
1829
|
+
* ```
|
|
1830
|
+
*
|
|
1831
|
+
* 输出(统一 AIChatResponse):
|
|
1832
|
+
* ```json
|
|
1833
|
+
* {
|
|
1834
|
+
* "text": "好的,我来点击提交按钮。",
|
|
1835
|
+
* "toolCalls": [{ "id": "toolu_01A09q90qw90lq917835lhds", "name": "dom", "input": { "action": "click", "selector": "#submit-btn" } }],
|
|
1836
|
+
* "usage": { "inputTokens": 1500, "outputTokens": 62 }
|
|
1837
|
+
* }
|
|
1838
|
+
* ```
|
|
1839
|
+
*
|
|
1840
|
+
* @param data - Anthropic API 返回的原始 JSON 对象
|
|
1841
|
+
* @returns 统一的 AIChatResponse
|
|
1532
1842
|
*/
|
|
1533
1843
|
function parseAnthropicResponse(data) {
|
|
1534
1844
|
const d = data;
|
|
@@ -1548,7 +1858,43 @@ function parseAnthropicResponse(data) {
|
|
|
1548
1858
|
};
|
|
1549
1859
|
}
|
|
1550
1860
|
/**
|
|
1551
|
-
*
|
|
1861
|
+
* 统一消息格式转换为 Anthropic 格式(中)/ Convert unified AIMessage[] to Anthropic message format (EN).
|
|
1862
|
+
*
|
|
1863
|
+
* 转换规则:
|
|
1864
|
+
* - 过滤掉 `system` 角色消息(system prompt 已由顶层 `system` 字段处理)
|
|
1865
|
+
* - `tool` 角色 → Anthropic `user` 角色 + `tool_result` content blocks
|
|
1866
|
+
* - `assistant` 带 toolCalls → text block + tool_use blocks
|
|
1867
|
+
* - 其他 → 直接映射 role + content
|
|
1868
|
+
*
|
|
1869
|
+
* 输入示例(统一 AIMessage[]):
|
|
1870
|
+
* ```ts
|
|
1871
|
+
* messages = [
|
|
1872
|
+
* { role: "user", content: "帮我点击提交按钮" },
|
|
1873
|
+
* { role: "assistant", content: "好的", toolCalls: [
|
|
1874
|
+
* { id: "toolu_01A", name: "dom", input: { action: "click", selector: "#btn" } }
|
|
1875
|
+
* ]},
|
|
1876
|
+
* { role: "tool", content: [
|
|
1877
|
+
* { toolCallId: "toolu_01A", result: "点击成功" }
|
|
1878
|
+
* ]}
|
|
1879
|
+
* ]
|
|
1880
|
+
* ```
|
|
1881
|
+
*
|
|
1882
|
+
* 输出示例(Anthropic 格式):
|
|
1883
|
+
* ```json
|
|
1884
|
+
* [
|
|
1885
|
+
* { "role": "user", "content": "帮我点击提交按钮" },
|
|
1886
|
+
* { "role": "assistant", "content": [
|
|
1887
|
+
* { "type": "text", "text": "好的" },
|
|
1888
|
+
* { "type": "tool_use", "id": "toolu_01A", "name": "dom", "input": { "action": "click", "selector": "#btn" } }
|
|
1889
|
+
* ]},
|
|
1890
|
+
* { "role": "user", "content": [
|
|
1891
|
+
* { "type": "tool_result", "tool_use_id": "toolu_01A", "content": "点击成功" }
|
|
1892
|
+
* ]}
|
|
1893
|
+
* ]
|
|
1894
|
+
* ```
|
|
1895
|
+
*
|
|
1896
|
+
* @param messages - 统一消息列表
|
|
1897
|
+
* @returns Anthropic 格式的消息数组
|
|
1552
1898
|
*/
|
|
1553
1899
|
function convertMessages(messages) {
|
|
1554
1900
|
return messages.filter((m) => m.role !== "system").map((m) => {
|
|
@@ -1584,7 +1930,65 @@ function convertMessages(messages) {
|
|
|
1584
1930
|
});
|
|
1585
1931
|
}
|
|
1586
1932
|
/**
|
|
1587
|
-
* 解析 Anthropic SSE
|
|
1933
|
+
* 解析 Anthropic SSE 流式响应(中)/ Parse Anthropic SSE stream into unified AIChatResponse (EN).
|
|
1934
|
+
*
|
|
1935
|
+
* 事件处理流程:
|
|
1936
|
+
* - `message_start` → 提取 input_tokens
|
|
1937
|
+
* - `content_block_start` → 识别 tool_use 块,开始累积工具参数 JSON
|
|
1938
|
+
* - `content_block_delta` → 累积文本(text_delta)或工具参数片段(input_json_delta)
|
|
1939
|
+
* - `content_block_stop` → 完成当前工具调用,解析参数 JSON
|
|
1940
|
+
* - `message_delta` → 提取 output_tokens
|
|
1941
|
+
*
|
|
1942
|
+
* SSE 流示例(含工具调用):
|
|
1943
|
+
* ```
|
|
1944
|
+
* event: message_start
|
|
1945
|
+
* data: {"type":"message_start","message":{"usage":{"input_tokens":1500}}}
|
|
1946
|
+
*
|
|
1947
|
+
* event: content_block_start
|
|
1948
|
+
* data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
|
|
1949
|
+
*
|
|
1950
|
+
* event: content_block_delta
|
|
1951
|
+
* data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"好的,我来点"}}
|
|
1952
|
+
*
|
|
1953
|
+
* event: content_block_delta
|
|
1954
|
+
* data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"击提交按钮。"}}
|
|
1955
|
+
*
|
|
1956
|
+
* event: content_block_stop
|
|
1957
|
+
* data: {"type":"content_block_stop","index":0}
|
|
1958
|
+
*
|
|
1959
|
+
* event: content_block_start
|
|
1960
|
+
* data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01A09q","name":"dom"}}
|
|
1961
|
+
*
|
|
1962
|
+
* event: content_block_delta
|
|
1963
|
+
* data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"action\":"}}
|
|
1964
|
+
*
|
|
1965
|
+
* event: content_block_delta
|
|
1966
|
+
* data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"click\",\"selector\":\"#submit-btn\"}"}}
|
|
1967
|
+
*
|
|
1968
|
+
* event: content_block_stop
|
|
1969
|
+
* data: {"type":"content_block_stop","index":1}
|
|
1970
|
+
*
|
|
1971
|
+
* event: message_delta
|
|
1972
|
+
* data: {"type":"message_delta","usage":{"output_tokens":62}}
|
|
1973
|
+
*
|
|
1974
|
+
* event: message_stop
|
|
1975
|
+
* data: {"type":"message_stop"}
|
|
1976
|
+
* ```
|
|
1977
|
+
*
|
|
1978
|
+
* 最终输出(统一 AIChatResponse):
|
|
1979
|
+
* ```json
|
|
1980
|
+
* {
|
|
1981
|
+
* "text": "好的,我来点击提交按钮。",
|
|
1982
|
+
* "toolCalls": [{ "id": "toolu_01A09q", "name": "dom", "input": { "action": "click", "selector": "#submit-btn" } }],
|
|
1983
|
+
* "usage": { "inputTokens": 1500, "outputTokens": 62 }
|
|
1984
|
+
* }
|
|
1985
|
+
* ```
|
|
1986
|
+
*
|
|
1987
|
+
* 注意:Anthropic SSE 不发送 `[DONE]`,因此 `stopOnDone` 设为 false,
|
|
1988
|
+
* 依赖流关闭来结束消费。
|
|
1989
|
+
*
|
|
1990
|
+
* @param response - Anthropic API 的流式 HTTP 响应
|
|
1991
|
+
* @returns 统一的 AIChatResponse(文本 + 工具调用 + usage)
|
|
1588
1992
|
*/
|
|
1589
1993
|
async function parseAnthropicStream(response) {
|
|
1590
1994
|
if (!response.body) return parseAnthropicResponse(await response.json());
|
|
@@ -1641,7 +2045,7 @@ async function parseAnthropicStream(response) {
|
|
|
1641
2045
|
}
|
|
1642
2046
|
|
|
1643
2047
|
//#endregion
|
|
1644
|
-
//#region src/core/ai-client/deepseek.ts
|
|
2048
|
+
//#region src/core/ai-client/models/deepseek.ts
|
|
1645
2049
|
/**
|
|
1646
2050
|
* DeepSeek 客户端封装(中)/ DeepSeek client wrapper (EN).
|
|
1647
2051
|
*
|
|
@@ -1654,7 +2058,7 @@ async function parseAnthropicStream(response) {
|
|
|
1654
2058
|
var DeepSeekClient = class extends OpenAIClient {};
|
|
1655
2059
|
|
|
1656
2060
|
//#endregion
|
|
1657
|
-
//#region src/core/ai-client/doubao.ts
|
|
2061
|
+
//#region src/core/ai-client/models/doubao.ts
|
|
1658
2062
|
/**
|
|
1659
2063
|
* Doubao 客户端封装(中)/ Doubao client wrapper (EN).
|
|
1660
2064
|
*
|
|
@@ -1667,7 +2071,7 @@ var DeepSeekClient = class extends OpenAIClient {};
|
|
|
1667
2071
|
var DoubaoClient = class extends OpenAIClient {};
|
|
1668
2072
|
|
|
1669
2073
|
//#endregion
|
|
1670
|
-
//#region src/core/ai-client/qwen.ts
|
|
2074
|
+
//#region src/core/ai-client/models/qwen.ts
|
|
1671
2075
|
/**
|
|
1672
2076
|
* Qwen 客户端封装(中)/ Qwen client wrapper (EN).
|
|
1673
2077
|
*
|
|
@@ -1680,7 +2084,7 @@ var DoubaoClient = class extends OpenAIClient {};
|
|
|
1680
2084
|
var QwenClient = class extends OpenAIClient {};
|
|
1681
2085
|
|
|
1682
2086
|
//#endregion
|
|
1683
|
-
//#region src/core/ai-client/minimax.ts
|
|
2087
|
+
//#region src/core/ai-client/models/minimax.ts
|
|
1684
2088
|
/**
|
|
1685
2089
|
* MiniMax 客户端封装(中)/ MiniMax client wrapper (EN).
|
|
1686
2090
|
*
|
|
@@ -1877,7 +2281,7 @@ function buildSystemPrompt(params = {}) {
|
|
|
1877
2281
|
"- listeners=\"...\" = bound event handlers (abbrevs below). Prefer targets with matching listeners.",
|
|
1878
2282
|
"- Click target MUST have click signal: listeners containing clk/pdn/mdn, or onclick attr, or native <a>/<button>, or role=button/link. NEVER click elements with only blr/fcs (focus/blur) — they are not click targets.",
|
|
1879
2283
|
"- If the text you want to click has no click signal, look at its parent row/container or nearby sibling that does have clk listener.",
|
|
1880
|
-
"- No-effect fallback: try
|
|
2284
|
+
"- No-effect fallback: if a click produced no page change (snapshot unchanged), do NOT repeat the same target. Instead: (1) look for <a> links or <button> inside the clicked container; (2) try a parent or sibling with stronger click signal; (3) try a completely different approach (e.g., search, filter, sidebar navigation, or use evaluate to trigger the action programmatically).",
|
|
1881
2285
|
"- Batch fill/type/check/select_option freely within one round. A click always ends the round — send at most ONE click as the LAST action in a batch.",
|
|
1882
2286
|
"- Input order (MANDATORY): focus/click → fill/type/select_option per target. Multi-field: focus A→fill A→focus B→fill B.",
|
|
1883
2287
|
"- Search/filter inputs: after fill, press Enter (or click search button) to trigger the search. Do not assume fill alone submits.",
|
|
@@ -1885,7 +2289,7 @@ function buildSystemPrompt(params = {}) {
|
|
|
1885
2289
|
"- Steppers: compute delta from visible value, click exactly |delta| times. Check/uncheck: target real input control.",
|
|
1886
2290
|
"- DOM-changing action (click/modal/navigate): ends the round, next snapshot follows. Actions sent after a click in the same batch are discarded.",
|
|
1887
2291
|
"- Intermediate progress is NOT completion: if an action only opens, expands, reveals, filters, paginates, switches context, or loads the next step, keep REMAINING on the final user goal until the requested end state/value/content is visible in the snapshot.",
|
|
1888
|
-
"- Effect check: before planning new actions, confirm previous actions' expected effects are visible in current snapshot. If
|
|
2292
|
+
"- Effect check: before planning new actions, confirm previous actions' expected effects are visible in current snapshot. If the snapshot is unchanged after a click, the click FAILED — you MUST pick a different element (e.g., an <a> or <button> child inside the row, or the link text itself).",
|
|
1889
2293
|
"- Do NOT call page_info — snapshot is auto-refreshed and provided every round. Do NOT use get_text/get_attr to read what is already visible in the snapshot.",
|
|
1890
2294
|
"- Never repeat the same tool call (same name + same args) on the same target. If it didn't work, try a different approach.",
|
|
1891
2295
|
"- Dropdown/select: prefer dom.select_option (works in one round). For custom dropdowns requiring click-to-open: click → wait for next snapshot → click option (two rounds).",
|