agentpage 0.0.48 → 0.0.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -999,7 +999,7 @@ async function executeAgentLoop(params) {
999
999
  "- You performed the EXACT same tool call(s) as the previous round, but NO visible change occurred in the snapshot.",
1000
1000
  "The clicked element did not trigger navigation or DOM change. This round you MUST do ONE of:",
1001
1001
  "1) Look INSIDE the clicked container for an <a> link, <button>, or child element with clk/pdn/mdn listener, and click THAT instead;",
1002
- "2) If there is a visible href/URL, use navigate.goto to go there directly;",
1002
+ "2) Try a parent or sibling element with stronger click signal (clk/pdn/mdn listener);",
1003
1003
  "3) Try a completely different approach (e.g., search, filter, or navigate via sidebar);",
1004
1004
  "4) If the task is truly complete, return REMAINING: DONE with no tool calls.",
1005
1005
  "Do NOT repeat the same action again."
@@ -1087,16 +1087,6 @@ async function executeAgentLoop(params) {
1087
1087
  if (finalReply) callbacks?.onText?.(finalReply);
1088
1088
  break;
1089
1089
  }
1090
- if (consecutiveNoProtocolRounds >= 5) {
1091
- finalReply = response.text?.trim() || "任务已完成。";
1092
- if (finalReply) callbacks?.onText?.(finalReply);
1093
- break;
1094
- }
1095
- if (consecutiveNoProtocolRounds >= 3) protocolViolationHint = [
1096
- "Protocol reminder: REMAINING protocol missing for 3+ rounds with tool calls.",
1097
- "You MUST include REMAINING: <text> or REMAINING: DONE in every response.",
1098
- "If the task is fully complete, return REMAINING: DONE with no tool calls."
1099
- ].join("\n");
1100
1090
  const idleResult = detectIdleLoop(executedTaskCalls, consecutiveReadOnlyRounds);
1101
1091
  if (idleResult === -1) {
1102
1092
  finalReply = response.text?.trim() || "任务已完成。";
@@ -1107,15 +1097,29 @@ async function executeAgentLoop(params) {
1107
1097
  if (roundHasPotentialDomMutation) await runRoundStabilityBarrier();
1108
1098
  await refreshSnapshot();
1109
1099
  if (roundHasPotentialDomMutation) {
1110
- if (computeSnapshotFingerprint(pageContext.latestSnapshot || "") === roundStartFingerprint && roundStartFingerprint !== "") {
1100
+ const roundEndFingerprint = computeSnapshotFingerprint(pageContext.latestSnapshot || "");
1101
+ if (roundEndFingerprint === roundStartFingerprint && roundStartFingerprint !== "") {
1111
1102
  const unchangedHint = [
1112
1103
  "Snapshot unchanged after action:",
1113
1104
  "- The page snapshot is IDENTICAL before and after your action(s) this round.",
1114
1105
  "- Your click/action had NO visible effect on the page. Do NOT repeat it.",
1115
- "- Look INSIDE the target for <a>/<button>/child with clk listener, or use navigate.goto if href is visible."
1106
+ "- Look INSIDE the target for <a>/<button>/child with clk listener, or try a parent/sibling with stronger signal, or use a completely different approach."
1116
1107
  ].join("\n");
1117
1108
  protocolViolationHint = protocolViolationHint ? protocolViolationHint + "\n\n" + unchangedHint : unchangedHint;
1118
- }
1109
+ } else if (roundEndFingerprint !== roundStartFingerprint) consecutiveNoProtocolRounds = 0;
1110
+ }
1111
+ if (consecutiveNoProtocolRounds >= 5) {
1112
+ finalReply = response.text?.trim() || "任务已完成。";
1113
+ if (finalReply) callbacks?.onText?.(finalReply);
1114
+ break;
1115
+ }
1116
+ if (consecutiveNoProtocolRounds >= 3) {
1117
+ const noProtocolHint = [
1118
+ "Protocol reminder: REMAINING protocol missing for 3+ rounds with tool calls.",
1119
+ "You MUST include REMAINING: <text> or REMAINING: DONE in every response.",
1120
+ "If the task is fully complete, return REMAINING: DONE with no tool calls."
1121
+ ].join("\n");
1122
+ protocolViolationHint = protocolViolationHint ? protocolViolationHint + "\n\n" + noProtocolHint : noProtocolHint;
1119
1123
  }
1120
1124
  }
1121
1125
  const resultMessages = [...history ?? [], {
@@ -1289,12 +1293,33 @@ var BaseAIClient = class {
1289
1293
  };
1290
1294
 
1291
1295
  //#endregion
1292
- //#region src/core/ai-client/openai.ts
1296
+ //#region src/core/ai-client/models/openai.ts
1297
+ /** 单次请求默认超时时间(毫秒) */
1293
1298
  const DEFAULT_REQUEST_TIMEOUT_MS = 45e3;
1299
+ /** JSON(非流式)模式超时后的最大重试次数 */
1294
1300
  const JSON_TIMEOUT_RETRY_COUNT = 1;
1301
+ /**
1302
+ * 判断错误是否为请求超时错误(中)/ Check if error is a request timeout (EN).
1303
+ * 仅匹配 `fetchWithTimeout` 抛出的特定格式错误消息。
1304
+ */
1295
1305
  function isRequestTimeoutError(error) {
1296
1306
  return error instanceof Error && /^AI request timeout \(\d+ms\)$/.test(error.message);
1297
1307
  }
1308
+ /**
1309
+ * 带超时的 fetch 封装(中)/ Fetch wrapper with AbortController-based timeout (EN).
1310
+ *
1311
+ * 工作原理:
1312
+ * 1. 创建 AbortController,设置 setTimeout 在超时后调用 controller.abort()
1313
+ * 2. 将 controller.signal 注入 fetch 请求
1314
+ * 3. 若 fetch 被 abort,捕获 AbortError 并转换为语义明确的超时错误
1315
+ * 4. 无论成功或失败,finally 中清除定时器避免泄漏
1316
+ *
1317
+ * @param input - 请求 URL 或 Request 对象
1318
+ * @param init - fetch 请求配置
1319
+ * @param timeoutMs - 超时毫秒数(<=0 或非有限数时不设超时)
1320
+ * @returns fetch Response
1321
+ * @throws Error - 超时时抛出 "AI request timeout (Xms)"
1322
+ */
1298
1323
  async function fetchWithTimeout(input, init, timeoutMs) {
1299
1324
  if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return fetch(input, init);
1300
1325
  const controller = new AbortController();
@@ -1312,11 +1337,30 @@ async function fetchWithTimeout(input, init, timeoutMs) {
1312
1337
  }
1313
1338
  }
1314
1339
  /**
1315
- * OpenAIClient 类(中)/ OpenAIClient class for OpenAI & Copilot (EN).
1340
+ * OpenAIClient 类(中)/ OpenAI & Copilot client implementation (EN).
1341
+ *
1342
+ * 继承 BaseAIClient,通过注入 chatHandler 实现 OpenAI Chat Completions 协议。
1343
+ * 根据 `config.stream`(默认 true)自动选择流式(SSE)或非流式(JSON)模式。
1344
+ *
1345
+ * 非流式模式特性:
1346
+ * - 使用 `fetchWithTimeout` + AbortController 实现请求级超时
1347
+ * - 超时后自动重试 1 次(`JSON_TIMEOUT_RETRY_COUNT`),应对单次网络抖动
1348
+ * - 非超时错误不重试,直接抛出
1349
+ *
1350
+ * 流式模式特性:
1351
+ * - 通过 SSE 逐 chunk 接收 delta 内容和工具调用片段
1352
+ * - 若服务端返回 application/json(如模型降级),自动回退为 JSON 解析
1353
+ * - 单次 chunk 读取超时 20s(`readTimeoutMs`)
1354
+ *
1355
+ * 本类也是 DeepSeek / Doubao / Qwen / MiniMax 等兼容 provider 的基类。
1316
1356
  */
1317
1357
  var OpenAIClient = class extends BaseAIClient {
1318
1358
  /** AI 客户端配置(provider / model / apiKey / baseURL) */
1319
1359
  config;
1360
+ /**
1361
+ * 构造 OpenAIClient 实例。
1362
+ * @param config - AI 客户端配置,需包含 provider / model / apiKey
1363
+ */
1320
1364
  constructor(config) {
1321
1365
  super({ chatHandler: async (params) => {
1322
1366
  const req = buildOpenAIRequest(this.config, params);
@@ -1357,7 +1401,48 @@ var OpenAIClient = class extends BaseAIClient {
1357
1401
  }
1358
1402
  };
1359
1403
  /**
1360
- * 构建 OpenAI 请求(中)/ Build OpenAI chat request payload (EN).
1404
+ * 构建 OpenAI Chat Completions 请求(中)/ Build OpenAI chat request payload (EN).
1405
+ *
1406
+ * 将统一的 ChatParams 转换为 OpenAI 协议格式:
1407
+ * - system prompt 作为 messages 数组首条 system 消息
1408
+ * - 工具定义转换为 `{ type: "function", function: { name, description, parameters } }` 格式
1409
+ * - 流式模式设置 `stream: true` + `stream_options: { include_usage: true }` 获取用量
1410
+ * - 有工具时设置 `tool_choice: "auto"` + `parallel_tool_calls`(默认允许并行)
1411
+ * - temperature 固定 0.3(偏确定性,适合工具调用场景)
1412
+ * - 认证使用 `Authorization: Bearer <apiKey>`
1413
+ *
1414
+ * 构建后的请求体示例(流式 + 含工具):
1415
+ * ```json
1416
+ * // POST https://api.openai.com/v1/chat/completions
1417
+ * // Headers: { "Authorization": "Bearer sk-xxx", "Content-Type": "application/json" }
1418
+ * {
1419
+ * "model": "gpt-4o",
1420
+ * "messages": [
1421
+ * { "role": "system", "content": "You are a browser automation agent..." },
1422
+ * { "role": "user", "content": "Click the submit button" }
1423
+ * ],
1424
+ * "tools": [
1425
+ * {
1426
+ * "type": "function",
1427
+ * "function": {
1428
+ * "name": "dom",
1429
+ * "description": "DOM interaction tool. Actions: click, fill, ...",
1430
+ * "parameters": { "type": "object", "properties": { "action": { ... }, "selector": { ... } } }
1431
+ * }
1432
+ * }
1433
+ * ],
1434
+ * "tool_choice": "auto",
1435
+ * "parallel_tool_calls": true,
1436
+ * "temperature": 0.3,
1437
+ * "max_tokens": 4096,
1438
+ * "stream": true,
1439
+ * "stream_options": { "include_usage": true }
1440
+ * }
1441
+ * ```
1442
+ *
1443
+ * @param config - AI 客户端配置
1444
+ * @param params - 统一聊天参数(systemPrompt / messages / tools)
1445
+ * @returns 构建好的 HTTP 请求对象(url / method / headers / body)
1361
1446
  */
1362
1447
  function buildOpenAIRequest(config, params) {
1363
1448
  const baseURL = resolveBaseURL(config);
@@ -1397,7 +1482,43 @@ function buildOpenAIRequest(config, params) {
1397
1482
  };
1398
1483
  }
1399
1484
  /**
1400
- * 解析 OpenAI 响应(中)/ Parse raw OpenAI response into AIChatResponse (EN).
1485
+ * 解析 OpenAI JSON 响应(中)
1486
+ *
1487
+ * 从 choices[0].message 中提取文本和工具调用,并映射 usage 统计。
1488
+ * 工具调用的 `arguments` 字段为 JSON 字符串,此处解析为对象。
1489
+ *
1490
+ * 输入示例(非流式 JSON 响应):
1491
+ * ```json
1492
+ * {
1493
+ * "choices": [{
1494
+ * "message": {
1495
+ * "content": null,
1496
+ * "tool_calls": [{
1497
+ * "id": "call_abc123",
1498
+ * "type": "function",
1499
+ * "function": {
1500
+ * "name": "dom",
1501
+ * "arguments": "{\"action\":\"click\",\"selector\":\"#submit-btn\"}"
1502
+ * }
1503
+ * }]
1504
+ * }
1505
+ * }],
1506
+ * "usage": { "prompt_tokens": 1200, "completion_tokens": 45 }
1507
+ * }
1508
+ * ```
1509
+ *
1510
+ * 输出(统一 AIChatResponse):
1511
+ * ```json
1512
+ * {
1513
+ * "text": undefined,
1514
+ * "toolCalls": [{ "id": "call_abc123", "name": "dom", "input": { "action": "click", "selector": "#submit-btn" } }],
1515
+ * "usage": { "inputTokens": 1200, "outputTokens": 45 }
1516
+ * }
1517
+ * ```
1518
+ *
1519
+ * @param data - OpenAI API 返回的原始 JSON 对象
1520
+ * @returns 统一的 AIChatResponse
1521
+ * @throws Error - choices 为空时抛出 "AI 未返回有效响应"
1401
1522
  */
1402
1523
  function parseOpenAIResponse(data) {
1403
1524
  const d = data;
@@ -1419,7 +1540,42 @@ function parseOpenAIResponse(data) {
1419
1540
  };
1420
1541
  }
1421
1542
  /**
1422
- * 消息转换(中)/ Convert unified messages to OpenAI format (EN).
1543
+ * 统一消息格式转换为 OpenAI 格式(中)/ Convert unified AIMessage[] to OpenAI message format (EN).
1544
+ *
1545
+ * 转换规则:
1546
+ * - system prompt → 首条 `{ role: "system", content }` 消息
1547
+ * - `tool` 角色 → 展开为多条 `{ role: "tool", tool_call_id, content }` 消息(OpenAI 要求每个结果独立一条)
1548
+ * - `assistant` 带 toolCalls → `{ role: "assistant", content, tool_calls }` 消息
1549
+ * - 其他(user / assistant 纯文本)→ 直接映射
1550
+ *
1551
+ * 输入示例(统一 AIMessage[]):
1552
+ * ```ts
1553
+ * messages = [
1554
+ * { role: "user", content: "帮我点击提交按钮" },
1555
+ * { role: "assistant", content: "好的", toolCalls: [
1556
+ * { id: "call_abc", name: "dom", input: { action: "click", selector: "#btn" } }
1557
+ * ]},
1558
+ * { role: "tool", content: [
1559
+ * { toolCallId: "call_abc", result: "点击成功" }
1560
+ * ]}
1561
+ * ]
1562
+ * ```
1563
+ *
1564
+ * 输出示例(OpenAI 格式):
1565
+ * ```json
1566
+ * [
1567
+ * { "role": "system", "content": "You are a browser automation agent..." },
1568
+ * { "role": "user", "content": "帮我点击提交按钮" },
1569
+ * { "role": "assistant", "content": "好的", "tool_calls": [
1570
+ * { "id": "call_abc", "type": "function", "function": { "name": "dom", "arguments": "{\"action\":\"click\",\"selector\":\"#btn\"}" } }
1571
+ * ]},
1572
+ * { "role": "tool", "tool_call_id": "call_abc", "content": "点击成功" }
1573
+ * ]
1574
+ * ```
1575
+ *
1576
+ * @param systemPrompt - 系统提示词
1577
+ * @param messages - 统一消息列表
1578
+ * @returns OpenAI 格式的消息数组
1423
1579
  */
1424
1580
  function convertMessages$1(systemPrompt, messages) {
1425
1581
  const result = [{
@@ -1450,7 +1606,52 @@ function convertMessages$1(systemPrompt, messages) {
1450
1606
  return result;
1451
1607
  }
1452
1608
  /**
1453
- * 解析 OpenAI SSE(中)/ Parse OpenAI SSE stream into unified response (EN).
1609
+ * 解析 OpenAI SSE 流式响应(中)/ Parse OpenAI SSE stream into unified AIChatResponse (EN).
1610
+ *
1611
+ * 工作原理:
1612
+ * 1. 通过 `consumeSSEJSON` 逐 chunk 消费 SSE 事件
1613
+ * 2. 文本内容(`delta.content`)逐 chunk 拼接为完整字符串
1614
+ * 3. 工具调用按 `delta.tool_calls[].index` 累积:
1615
+ * - 首个 delta 包含 id 和 name
1616
+ * - 后续 delta 只包含 arguments 片段,需要拼接
1617
+ * - 最终按 index 排序,逐个 JSON.parse 解析 arguments
1618
+ * 4. usage 信息来自最终 chunk(需 `stream_options.include_usage=true`)
1619
+ * 5. 遇到 `[DONE]` 信号自动结束(`stopOnDone: true`)
1620
+ *
1621
+ * SSE 流示例(含工具调用):
1622
+ * ```
1623
+ * data: {"choices":[{"delta":{"tool_calls":[{"index":0,"id":"call_abc","function":{"name":"dom","arguments":""}}]}}]}
1624
+ * data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"action"}}]}}]}
1625
+ * data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":":\"click\",\"selector"}}]}}]}
1626
+ * data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":":\"#submit-btn\"}"}}]}}]}
1627
+ * data: {"choices":[{"delta":{}}],"usage":{"prompt_tokens":1200,"completion_tokens":45}}
1628
+ * data: [DONE]
1629
+ * ```
1630
+ *
1631
+ * SSE 流示例(纯文本):
1632
+ * ```
1633
+ * data: {"choices":[{"delta":{"content":"我"}}]}
1634
+ * data: {"choices":[{"delta":{"content":"已经"}}]}
1635
+ * data: {"choices":[{"delta":{"content":"完成了"}}]}
1636
+ * data: {"choices":[{"delta":{"content":"任务。"}}]}
1637
+ * data: {"choices":[{"delta":{}},"usage":{"prompt_tokens":800,"completion_tokens":12}]}
1638
+ * data: [DONE]
1639
+ * ```
1640
+ *
1641
+ * 最终输出(统一 AIChatResponse):
1642
+ * ```json
1643
+ * {
1644
+ * "text": "我已经完成了任务。",
1645
+ * "toolCalls": undefined,
1646
+ * "usage": { "inputTokens": 800, "outputTokens": 12 }
1647
+ * }
1648
+ * ```
1649
+ *
1650
+ * 回退:若 response.body 不可用(无 ReadableStream 支持),回退为 JSON 解析。
1651
+ *
1652
+ * @param response - OpenAI API 的流式 HTTP 响应
1653
+ * @param readTimeoutMs - 单次 chunk 读取超时(毫秒,默认 20000)
1654
+ * @returns 统一的 AIChatResponse(文本 + 工具调用 + usage)
1454
1655
  */
1455
1656
  async function parseOpenAIStream(response, readTimeoutMs = 2e4) {
1456
1657
  if (!response.body) return parseOpenAIResponse(await response.json());
@@ -1496,13 +1697,22 @@ async function parseOpenAIStream(response, readTimeoutMs = 2e4) {
1496
1697
  }
1497
1698
 
1498
1699
  //#endregion
1499
- //#region src/core/ai-client/anthropic.ts
1700
+ //#region src/core/ai-client/models/anthropic.ts
1500
1701
  /**
1501
1702
  * AnthropicClient 类(中)/ AnthropicClient class (EN).
1703
+ *
1704
+ * 继承 BaseAIClient,通过注入 chatHandler 实现 Anthropic Messages API 的请求与响应处理。
1705
+ * 根据 `config.stream`(默认 true)自动选择 SSE 流式 或 JSON 非流式模式。
1706
+ *
1707
+ * 流式模式下,若服务端返回 `application/json`(如模型降级),自动回退为 JSON 解析。
1502
1708
  */
1503
1709
  var AnthropicClient = class extends BaseAIClient {
1504
1710
  /** AI 客户端配置(provider / model / apiKey / baseURL) */
1505
1711
  config;
1712
+ /**
1713
+ * 构造 AnthropicClient 实例。
1714
+ * @param config - AI 客户端配置,需包含 provider="anthropic"、model、apiKey
1715
+ */
1506
1716
  constructor(config) {
1507
1717
  super({ chatHandler: async (params) => {
1508
1718
  const req = buildAnthropicRequest(this.config, params);
@@ -1534,7 +1744,39 @@ var AnthropicClient = class extends BaseAIClient {
1534
1744
  }
1535
1745
  };
1536
1746
  /**
1537
- * 构建 Anthropic 请求(中)/ Build Anthropic Messages API request (EN).
1747
+ * 构建 Anthropic Messages API 请求(中)/ Build Anthropic Messages API request payload (EN).
1748
+ *
1749
+ * 将统一的 ChatParams 转换为 Anthropic 协议格式:
1750
+ * - system prompt 放顶层 `system` 字段(非消息数组)
1751
+ * - 工具定义使用 `input_schema`(非 `parameters`)
1752
+ * - max_tokens 根据模型名自动调整(opus 系列 16384,其他 8192)
1753
+ * - 认证头使用 `x-api-key` + `anthropic-version`
1754
+ *
1755
+ * 构建后的请求体示例(流式 + 含工具):
1756
+ * ```json
1757
+ * // POST https://api.anthropic.com/v1/messages
1758
+ * // Headers: { "x-api-key": "sk-ant-xxx", "anthropic-version": "2023-06-01", "Content-Type": "application/json" }
1759
+ * {
1760
+ * "model": "claude-sonnet-4-20250514",
1761
+ * "max_tokens": 8192,
1762
+ * "system": "You are a browser automation agent...",
1763
+ * "messages": [
1764
+ * { "role": "user", "content": "Click the submit button" }
1765
+ * ],
1766
+ * "tools": [
1767
+ * {
1768
+ * "name": "dom",
1769
+ * "description": "DOM interaction tool. Actions: click, fill, ...",
1770
+ * "input_schema": { "type": "object", "properties": { "action": { ... }, "selector": { ... } } }
1771
+ * }
1772
+ * ],
1773
+ * "stream": true
1774
+ * }
1775
+ * ```
1776
+ *
1777
+ * @param config - AI 客户端配置
1778
+ * @param params - 统一聊天参数(systemPrompt / messages / tools)
1779
+ * @returns 构建好的 HTTP 请求对象(url / method / headers / body)
1538
1780
  */
1539
1781
  function buildAnthropicRequest(config, params) {
1540
1782
  const baseURL = resolveBaseURL(config);
@@ -1565,7 +1807,38 @@ function buildAnthropicRequest(config, params) {
1565
1807
  };
1566
1808
  }
1567
1809
  /**
1568
- * 解析 Anthropic 响应(中)/ Parse raw Anthropic response (EN).
1810
+ * 解析 Anthropic JSON 响应(中)/ Parse raw Anthropic JSON response into unified AIChatResponse (EN).
1811
+ *
1812
+ * 从 content 数组中提取所有 text 块(合并为字符串)和 tool_use 块(转为 AIToolCall),
1813
+ * 并映射 usage 字段为统一的 inputTokens / outputTokens。
1814
+ *
1815
+ * 输入示例(非流式 JSON 响应):
1816
+ * ```json
1817
+ * {
1818
+ * "content": [
1819
+ * { "type": "text", "text": "好的,我来点击提交按钮。" },
1820
+ * {
1821
+ * "type": "tool_use",
1822
+ * "id": "toolu_01A09q90qw90lq917835lhds",
1823
+ * "name": "dom",
1824
+ * "input": { "action": "click", "selector": "#submit-btn" }
1825
+ * }
1826
+ * ],
1827
+ * "usage": { "input_tokens": 1500, "output_tokens": 62 }
1828
+ * }
1829
+ * ```
1830
+ *
1831
+ * 输出(统一 AIChatResponse):
1832
+ * ```json
1833
+ * {
1834
+ * "text": "好的,我来点击提交按钮。",
1835
+ * "toolCalls": [{ "id": "toolu_01A09q90qw90lq917835lhds", "name": "dom", "input": { "action": "click", "selector": "#submit-btn" } }],
1836
+ * "usage": { "inputTokens": 1500, "outputTokens": 62 }
1837
+ * }
1838
+ * ```
1839
+ *
1840
+ * @param data - Anthropic API 返回的原始 JSON 对象
1841
+ * @returns 统一的 AIChatResponse
1569
1842
  */
1570
1843
  function parseAnthropicResponse(data) {
1571
1844
  const d = data;
@@ -1585,7 +1858,43 @@ function parseAnthropicResponse(data) {
1585
1858
  };
1586
1859
  }
1587
1860
  /**
1588
- * 消息格式转换(中)/ Convert unified messages to Anthropic format (EN).
1861
+ * 统一消息格式转换为 Anthropic 格式(中)/ Convert unified AIMessage[] to Anthropic message format (EN).
1862
+ *
1863
+ * 转换规则:
1864
+ * - 过滤掉 `system` 角色消息(system prompt 已由顶层 `system` 字段处理)
1865
+ * - `tool` 角色 → Anthropic `user` 角色 + `tool_result` content blocks
1866
+ * - `assistant` 带 toolCalls → text block + tool_use blocks
1867
+ * - 其他 → 直接映射 role + content
1868
+ *
1869
+ * 输入示例(统一 AIMessage[]):
1870
+ * ```ts
1871
+ * messages = [
1872
+ * { role: "user", content: "帮我点击提交按钮" },
1873
+ * { role: "assistant", content: "好的", toolCalls: [
1874
+ * { id: "toolu_01A", name: "dom", input: { action: "click", selector: "#btn" } }
1875
+ * ]},
1876
+ * { role: "tool", content: [
1877
+ * { toolCallId: "toolu_01A", result: "点击成功" }
1878
+ * ]}
1879
+ * ]
1880
+ * ```
1881
+ *
1882
+ * 输出示例(Anthropic 格式):
1883
+ * ```json
1884
+ * [
1885
+ * { "role": "user", "content": "帮我点击提交按钮" },
1886
+ * { "role": "assistant", "content": [
1887
+ * { "type": "text", "text": "好的" },
1888
+ * { "type": "tool_use", "id": "toolu_01A", "name": "dom", "input": { "action": "click", "selector": "#btn" } }
1889
+ * ]},
1890
+ * { "role": "user", "content": [
1891
+ * { "type": "tool_result", "tool_use_id": "toolu_01A", "content": "点击成功" }
1892
+ * ]}
1893
+ * ]
1894
+ * ```
1895
+ *
1896
+ * @param messages - 统一消息列表
1897
+ * @returns Anthropic 格式的消息数组
1589
1898
  */
1590
1899
  function convertMessages(messages) {
1591
1900
  return messages.filter((m) => m.role !== "system").map((m) => {
@@ -1621,7 +1930,65 @@ function convertMessages(messages) {
1621
1930
  });
1622
1931
  }
1623
1932
  /**
1624
- * 解析 Anthropic SSE(中)/ Parse Anthropic SSE stream (EN).
1933
+ * 解析 Anthropic SSE 流式响应(中)/ Parse Anthropic SSE stream into unified AIChatResponse (EN).
1934
+ *
1935
+ * 事件处理流程:
1936
+ * - `message_start` → 提取 input_tokens
1937
+ * - `content_block_start` → 识别 tool_use 块,开始累积工具参数 JSON
1938
+ * - `content_block_delta` → 累积文本(text_delta)或工具参数片段(input_json_delta)
1939
+ * - `content_block_stop` → 完成当前工具调用,解析参数 JSON
1940
+ * - `message_delta` → 提取 output_tokens
1941
+ *
1942
+ * SSE 流示例(含工具调用):
1943
+ * ```
1944
+ * event: message_start
1945
+ * data: {"type":"message_start","message":{"usage":{"input_tokens":1500}}}
1946
+ *
1947
+ * event: content_block_start
1948
+ * data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}
1949
+ *
1950
+ * event: content_block_delta
1951
+ * data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"好的,我来点"}}
1952
+ *
1953
+ * event: content_block_delta
1954
+ * data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"击提交按钮。"}}
1955
+ *
1956
+ * event: content_block_stop
1957
+ * data: {"type":"content_block_stop","index":0}
1958
+ *
1959
+ * event: content_block_start
1960
+ * data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01A09q","name":"dom"}}
1961
+ *
1962
+ * event: content_block_delta
1963
+ * data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"action\":"}}
1964
+ *
1965
+ * event: content_block_delta
1966
+ * data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"\"click\",\"selector\":\"#submit-btn\"}"}}
1967
+ *
1968
+ * event: content_block_stop
1969
+ * data: {"type":"content_block_stop","index":1}
1970
+ *
1971
+ * event: message_delta
1972
+ * data: {"type":"message_delta","usage":{"output_tokens":62}}
1973
+ *
1974
+ * event: message_stop
1975
+ * data: {"type":"message_stop"}
1976
+ * ```
1977
+ *
1978
+ * 最终输出(统一 AIChatResponse):
1979
+ * ```json
1980
+ * {
1981
+ * "text": "好的,我来点击提交按钮。",
1982
+ * "toolCalls": [{ "id": "toolu_01A09q", "name": "dom", "input": { "action": "click", "selector": "#submit-btn" } }],
1983
+ * "usage": { "inputTokens": 1500, "outputTokens": 62 }
1984
+ * }
1985
+ * ```
1986
+ *
1987
+ * 注意:Anthropic SSE 不发送 `[DONE]`,因此 `stopOnDone` 设为 false,
1988
+ * 依赖流关闭来结束消费。
1989
+ *
1990
+ * @param response - Anthropic API 的流式 HTTP 响应
1991
+ * @returns 统一的 AIChatResponse(文本 + 工具调用 + usage)
1625
1992
  */
1626
1993
  async function parseAnthropicStream(response) {
1627
1994
  if (!response.body) return parseAnthropicResponse(await response.json());
@@ -1678,7 +2045,7 @@ async function parseAnthropicStream(response) {
1678
2045
  }
1679
2046
 
1680
2047
  //#endregion
1681
- //#region src/core/ai-client/deepseek.ts
2048
+ //#region src/core/ai-client/models/deepseek.ts
1682
2049
  /**
1683
2050
  * DeepSeek 客户端封装(中)/ DeepSeek client wrapper (EN).
1684
2051
  *
@@ -1691,7 +2058,7 @@ async function parseAnthropicStream(response) {
1691
2058
  var DeepSeekClient = class extends OpenAIClient {};
1692
2059
 
1693
2060
  //#endregion
1694
- //#region src/core/ai-client/doubao.ts
2061
+ //#region src/core/ai-client/models/doubao.ts
1695
2062
  /**
1696
2063
  * Doubao 客户端封装(中)/ Doubao client wrapper (EN).
1697
2064
  *
@@ -1704,7 +2071,7 @@ var DeepSeekClient = class extends OpenAIClient {};
1704
2071
  var DoubaoClient = class extends OpenAIClient {};
1705
2072
 
1706
2073
  //#endregion
1707
- //#region src/core/ai-client/qwen.ts
2074
+ //#region src/core/ai-client/models/qwen.ts
1708
2075
  /**
1709
2076
  * Qwen 客户端封装(中)/ Qwen client wrapper (EN).
1710
2077
  *
@@ -1717,7 +2084,7 @@ var DoubaoClient = class extends OpenAIClient {};
1717
2084
  var QwenClient = class extends OpenAIClient {};
1718
2085
 
1719
2086
  //#endregion
1720
- //#region src/core/ai-client/minimax.ts
2087
+ //#region src/core/ai-client/models/minimax.ts
1721
2088
  /**
1722
2089
  * MiniMax 客户端封装(中)/ MiniMax client wrapper (EN).
1723
2090
  *
@@ -1914,7 +2281,7 @@ function buildSystemPrompt(params = {}) {
1914
2281
  "- listeners=\"...\" = bound event handlers (abbrevs below). Prefer targets with matching listeners.",
1915
2282
  "- Click target MUST have click signal: listeners containing clk/pdn/mdn, or onclick attr, or native <a>/<button>, or role=button/link. NEVER click elements with only blr/fcs (focus/blur) — they are not click targets.",
1916
2283
  "- If the text you want to click has no click signal, look at its parent row/container or nearby sibling that does have clk listener.",
1917
- "- No-effect fallback: if a click produced no page change (snapshot unchanged), do NOT repeat the same target. Instead: (1) look for <a> links or <button> inside the clicked container; (2) try a parent or sibling with stronger click signal; (3) as last resort, try navigate.goto to the target URL if visible in an href.",
2284
+ "- No-effect fallback: if a click produced no page change (snapshot unchanged), do NOT repeat the same target. Instead: (1) look for <a> links or <button> inside the clicked container; (2) try a parent or sibling with stronger click signal; (3) try a completely different approach (e.g., search, filter, sidebar navigation, or use evaluate to trigger the action programmatically).",
1918
2285
  "- Batch fill/type/check/select_option freely within one round. A click always ends the round — send at most ONE click as the LAST action in a batch.",
1919
2286
  "- Input order (MANDATORY): focus/click → fill/type/select_option per target. Multi-field: focus A→fill A→focus B→fill B.",
1920
2287
  "- Search/filter inputs: after fill, press Enter (or click search button) to trigger the search. Do not assume fill alone submits.",