agentpage 0.0.41 → 0.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -225,6 +225,32 @@ function isPotentialDomMutation(toolName, toolInput) {
225
225
  ].includes(action);
226
226
  }
227
227
  /**
228
+ * 判定动作是否为"确定性推进"——比 isPotentialDomMutation 更窄。
229
+ *
230
+ * 仅包含必定产生可见状态变化的动作:
231
+ * - 表单输入类:fill / type / select_option / clear / check / uncheck
232
+ * - 导航类:navigate.*
233
+ *
234
+ * click 不在此列——因为 click 可能点了但完全没效果(如点击无 click listener 的元素)。
235
+ *
236
+ * 用途:协议缺失豁免。仅当本轮有"确定性推进"时才豁免协议缺失计数,
237
+ * 避免模型反复点击无效目标导致死循环。
238
+ */
239
+ function isConfirmedProgressAction(toolName, toolInput) {
240
+ if (toolName === "navigate") return true;
241
+ if (toolName !== "dom") return false;
242
+ const action = getToolAction(toolInput);
243
+ if (!action) return false;
244
+ return [
245
+ "fill",
246
+ "type",
247
+ "select_option",
248
+ "clear",
249
+ "check",
250
+ "uncheck"
251
+ ].includes(action);
252
+ }
253
+ /**
228
254
  * 采集找不到元素任务。
229
255
  *
230
256
  * 返回 null 表示当前结果不属于“元素未找到”,
@@ -896,7 +922,7 @@ async function executeAgentLoop(params) {
896
922
  }
897
923
  pendingNotFoundRetry = void 0;
898
924
  }
899
- if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
925
+ if (parsedInstructionState.hasRemainingProtocol && parsedInstructionState.nextInstruction.trim().length === 0) remainingInstruction = "";
900
926
  if (remainingInstruction.trim().length > 0 && round < maxRounds - 1) {
901
927
  protocolViolationHint = [
902
928
  "Protocol violation in previous round:",
@@ -949,6 +975,7 @@ async function executeAgentLoop(params) {
949
975
  }
950
976
  let roundHasError = false;
951
977
  let roundHasPotentialDomMutation = false;
978
+ let roundHasConfirmedProgress = false;
952
979
  const executedTaskCalls = [];
953
980
  const roundMissingTasks = [];
954
981
  for (const tc of response.toolCalls) {
@@ -980,6 +1007,7 @@ async function executeAgentLoop(params) {
980
1007
  if (missingTask) roundMissingTasks.push(missingTask);
981
1008
  if (result.details && typeof result.details === "object") roundHasError = roundHasError || Boolean(result.details.error);
982
1009
  if (!hasToolError(result) && isPotentialDomMutation(tc.name, tc.input)) roundHasPotentialDomMutation = true;
1010
+ if (!hasToolError(result) && isConfirmedProgressAction(tc.name, tc.input)) roundHasConfirmedProgress = true;
983
1011
  if (tc.name === "page_info" && getToolAction(tc.input) === "snapshot") {
984
1012
  pageContext.latestSnapshot = toContentString(result.content);
985
1013
  recordSnapshotStats(pageContext.latestSnapshot);
@@ -1002,11 +1030,12 @@ async function executeAgentLoop(params) {
1002
1030
  remainingInstruction = nextByHeuristic;
1003
1031
  consecutiveNoProtocolRounds = 0;
1004
1032
  } else if (executedTaskCalls.length > 0) {
1005
- if (!roundHasPotentialDomMutation || roundHasError) consecutiveNoProtocolRounds += 1;
1033
+ if (!roundHasConfirmedProgress || roundHasError) consecutiveNoProtocolRounds += 1;
1006
1034
  }
1007
1035
  }
1008
1036
  previousRoundModelOutput = parsedInstructionState.hasRemainingProtocol ? normalizeModelOutput(response.text) : `REMAINING: ${remainingInstruction || "DONE"}`;
1009
1037
  lastRoundHadError = roundHasError;
1038
+ if (executedTaskCalls.length === 0 && response.toolCalls.length > 0) lastRoundHadError = true;
1010
1039
  previousRoundTasks = buildTaskArray(executedTaskCalls);
1011
1040
  previousRoundPlannedTasks = plannedTasksCurrentRound;
1012
1041
  if (parsedInstructionState.hasRemainingProtocol && remainingInstruction.trim().length === 0 && !roundHasError) {
@@ -1014,20 +1043,17 @@ async function executeAgentLoop(params) {
1014
1043
  if (finalReply) callbacks?.onText?.(finalReply);
1015
1044
  break;
1016
1045
  }
1017
- if (consecutiveNoProtocolRounds >= 3) {
1046
+ if (consecutiveNoProtocolRounds >= 5) {
1018
1047
  finalReply = response.text?.trim() || "任务已完成。";
1019
1048
  if (finalReply) callbacks?.onText?.(finalReply);
1020
1049
  break;
1021
1050
  }
1022
- if (consecutiveNoProtocolRounds >= 2) protocolViolationHint = [
1023
- "Protocol reminder: REMAINING protocol missing for 2+ rounds with tool calls.",
1051
+ if (consecutiveNoProtocolRounds >= 3) protocolViolationHint = [
1052
+ "Protocol reminder: REMAINING protocol missing for 3+ rounds with tool calls.",
1024
1053
  "You MUST include REMAINING: <text> or REMAINING: DONE in every response.",
1025
1054
  "If the task is fully complete, return REMAINING: DONE with no tool calls."
1026
1055
  ].join("\n");
1027
- const idleResult = detectIdleLoop(response.toolCalls.map((tc) => ({
1028
- name: tc.name,
1029
- input: tc.input
1030
- })), consecutiveReadOnlyRounds);
1056
+ const idleResult = detectIdleLoop(executedTaskCalls, consecutiveReadOnlyRounds);
1031
1057
  if (idleResult === -1) {
1032
1058
  finalReply = response.text?.trim() || "任务已完成。";
1033
1059
  if (finalReply) callbacks?.onText?.(finalReply);
@@ -1831,7 +1857,8 @@ function buildSystemPrompt(params = {}) {
1831
1857
  "- Only interactive elements carry #hashID; others are context-only and cannot be targeted.",
1832
1858
  "- Bracket tag may show ARIA role ([combobox], [slider]) as primary interaction hint.",
1833
1859
  "- listeners=\"...\" = bound event handlers (abbrevs below). Prefer targets with matching listeners.",
1834
- "- Click priority: clk/pdn/mdn, onclick, native link/button, role=button/link. Avoid focus-only or hover-only signals.",
1860
+ "- Click target MUST have click signal: listeners containing clk/pdn/mdn, or onclick attr, or native <a>/<button>, or role=button/link. NEVER click elements with only blr/fcs (focus/blur) — they are not click targets.",
1861
+ "- If the text you want to click has no click signal, look at its parent row/container or nearby sibling that does have clk listener.",
1835
1862
  "- No-effect fallback: try nearest actionable sibling/ancestor in same semantic group instead of repeating.",
1836
1863
  "- Batch fill/type/check/select_option freely within one round. A click always ends the round — send at most ONE click as the LAST action in a batch.",
1837
1864
  "- Input order (MANDATORY): focus/click → fill/type/select_option per target. Multi-field: focus A→fill A→focus B→fill B.",