npm - agentpage - Versions diffs - 0.0.15 → 0.0.16 - Mend

agentpage 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -148,6 +148,7 @@ AI 每一轮不是“凭记忆猜页面”，而是基于最新快照选择可
 - 模型可在文本中返回：
   - `REMAINING: <剩余内容>`：表示还有任务要继续
   - `REMAINING: DONE`：表示剩余任务已空
+- 注意：模型在 `tool_calls` 轮可能返回空 `content`；这不代表任务结束。
 ### 3) 批量但不跨变更链式执行
@@ -180,6 +181,7 @@ AI 每一轮不是“凭记忆猜页面”，而是基于最新快照选择可
 - `Current remaining instruction`（当前剩余任务）
 - `Previous round planned task array`（上一轮已执行任务）
+- `Previous round model output (normalized)`（上一轮模型输出归一化摘要）
 - `Latest DOM snapshot`（当前快照）
 说明：
@@ -195,6 +197,9 @@ AI 每一轮不是“凭记忆猜页面”，而是基于最新快照选择可
   - `REMAINING: <new remaining instruction>`
   - 或 `REMAINING: DONE`
+实现细节：
+- 若该轮返回 `tool_calls` 且 `content` 为空，loop 仍以“工具执行结果”推进状态，不把空文本当完成信号。
 ### 3) 每轮执行与状态推进
 loop 对本轮返回做以下处理：
@@ -205,7 +210,11 @@ loop 对本轮返回做以下处理：
 4. 刷新快照进入下一轮
 5. 更新下一轮任务文本：
   - 优先使用 `REMAINING`
-  - 若缺失 `REMAINING`，保持当前任务不推进（按协议回退）
+  - 若缺失 `REMAINING` 且本轮有执行动作：按线性任务剔除做启发式推进（避免整段原任务重复）
+  - 若缺失 `REMAINING` 且本轮无执行进展：保持当前任务不推进（按协议回退）
+6. 若“remaining 未完成 + 无工具调用”：
+  - 不直接结束
+  - 下一轮注入 `Protocol violation` 强约束提示，要求“要么给可执行工具调用，要么严格 `REMAINING: DONE`”
 ### 3.1) 找不到元素重试流（Not-found Retry Dialogue）
@@ -223,7 +232,7 @@ loop 对本轮返回做以下处理：
 ### 4) 停机条件
-- 无工具调用
+- 无工具调用且 remaining 已完成（或明确 `REMAINING: DONE`）
 - `REMAINING: DONE` 后自然收敛
 - 重复批次防自转触发
 - 达到 `maxRounds`
@@ -274,6 +283,7 @@ loop 对本轮返回做以下处理：
 - `Current remaining instruction`
 - `Done steps (do NOT repeat)`
 - `Previous round planned task array`
+- `Previous round model output (normalized)`
 - `Latest DOM snapshot`
 这层是“每轮变化”的动态上下文。
@@ -285,7 +295,8 @@ loop 对本轮返回做以下处理：
 - 首轮使用前端注入的 `initialSnapshot`
 - 每轮执行后刷新快照
 - 推进 `remainingInstruction`
-- `REMAINING` 缺失时不推进任务（保持当前 remaining）
+- `REMAINING` 缺失且本轮有执行动作时：按线性任务剔除做启发式推进
+- `REMAINING` 缺失且本轮无执行进展时：保持当前 remaining
 - 防空转、防重复、防无限循环
 - DOM 变更动作触发强制断轮（等待下一轮新快照）
@@ -384,7 +395,7 @@ sequenceDiagram
 主流程位于 `src/core/agent-loop/index.ts`：
 1. 确保当前快照可用
-2. 构建紧凑消息（原始目标 + done steps + 最新快照）
+2. 构建紧凑消息（remaining + 执行历史 + 上轮模型输出 + 最新快照）
 3. 调用 AI
 4. 执行工具调用并记录 trace
 5. 运行保护机制
@@ -392,20 +403,24 @@ sequenceDiagram
 ### 渐进式执行状态（新增）
-`src/core/agent-loop/index.ts` 内部维护 3 个关键状态：
+`src/core/agent-loop/index.ts` 内部维护 5 个关键状态：
 - `remainingInstruction`：当前轮次待消费文本（初始值为用户原始输入）
 - `previousRoundTasks`：上一轮执行任务数组
+- `previousRoundPlannedTasks`：上一轮模型给出的计划批次（执行前）
+- `previousRoundModelOutput`：上一轮模型输出归一化摘要（执行后供下轮输入）
 - `lastPlannedBatchKey`：用于识别是否连续两轮给出完全相同的任务批次
 停机规则：
-- 若模型返回无工具调用 → 直接结束
+- 若模型返回无工具调用且 remaining 未完成 → 不直接结束，进入协议修复轮
+- 若模型返回无工具调用且 remaining 已完成（或 `REMAINING: DONE`）→ 结束
 - 若连续两轮规划出相同任务批次，且上一轮无错误 → 自动终止，防止自转
 - 若模型文本包含 `REMAINING: DONE`，通常下一轮会自然进入“无工具调用总结”并结束
 ### 紧凑消息结构
 由 `messages.ts` 构建，核心语义：
-- Master goal：用户原始任务（永远保留）
+- Round 0：用户原始任务 + 首轮快照
+- Round 1+：剩余任务 + done steps + 上轮计划批次 + 上轮模型输出归一化 + 最新快照
 - Done steps：已完成动作（避免重复）
 - Execution context + latest snapshot：当前可执行范围
@@ -458,6 +473,14 @@ sequenceDiagram
 通过 `ToolRegistry` 统一暴露给模型，执行结果标准化返回。
+### Playwright 对齐说明（当前实现）
+- `dom.click`：采用更完整的点击事件链（`pointerdown/mousedown/pointerup/mouseup/click`）。
+- `dom.select_option`：支持 `value/label/index`；结果返回显式 `value + label`。
+- `dom.fill`：不允许用于 `checkbox/radio/file/button/submit/reset` 等不兼容输入类型。
+- `wait.wait_for_selector`：支持 `state=attached|visible|hidden|detached`（默认 `attached`）。
+- 快照运行态增强：可见 `select val`、`option selected`、`checked`、`disabled`、`readonly`，减少重复操作。
 ---
 ## 扩展与自定义

package/dist/index.mjs CHANGED Viewed

@@ -162,7 +162,7 @@ function formatToolResultBrief(result) {
 * - `previousRoundTasks`：上一轮已执行的任务数组，避免重复计划。
 * - 消息中要求模型输出 `REMAINING: ...` 或 `REMAINING: DONE`，供下一轮继续消费。
 */
-function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks) {
+function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint) {
 	const messages = history ? [...history] : [];
 	const allowAgentUiInteraction = isExplicitAgentUiRequest(userMessage);
 	const activeInstruction = remainingInstruction && remainingInstruction.trim() ? remainingInstruction.trim() : userMessage;
@@ -176,6 +176,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
 		];
 		if (currentUrl) parts.push("", `URL: ${currentUrl}`);
 		if (latestSnapshot) parts.push("", "## Current page snapshot", "Apply task-reduction model directly from this snapshot. Do NOT restate the task.", "Use hash IDs (e.g. #a1b2c) from the snapshot as selector params.", "Do NOT call page_info (get_url/get_title/query_all/snapshot).", "Batch independent visible actions in one round.", "If action changes DOM (open modal/navigate), stop that batch and continue next round.", "For dropdown/select fields, use dom with action=select_option (or fill on a select).", allowAgentUiInteraction ? "User explicitly asked to operate AutoPilot UI. You may interact with chat input/send/dock only as requested." : "Do NOT interact with any AI chat UI elements (chat input, send button, dock). Only operate on the actual page content.", "Output one line: REMAINING: <new remaining task after this round> or REMAINING: DONE", wrapSnapshot(latestSnapshot));
+		if (protocolViolationHint) parts.push("", protocolViolationHint);
 		messages.push({
 			role: "user",
 			content: parts.join("\n")
@@ -215,6 +216,8 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
 	if (hasErrors) contextParts.push("", "The last step failed. Retry with a different approach, or skip and continue with other visible targets.");
 	else contextParts.push("", "If the goal is fully done, reply with a short summary (no tool calls).");
 	if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous round planned task array (already executed):", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`));
+	if (previousRoundPlannedTasks && previousRoundPlannedTasks.length > 0) contextParts.push("", "Previous round model planned task array (before execution):", ...previousRoundPlannedTasks.map((task, index) => `${index + 1}. ${task}`));
+	if (previousRoundModelOutput) contextParts.push("", "Previous round model output (normalized, for task reduction input):", previousRoundModelOutput);
 	contextParts.push("", "After this round, include one plain text line:", "REMAINING: <new remaining instruction after this-round actions>", "or REMAINING: DONE");
 	const lastEntry = trace[trace.length - 1];
 	if (hasToolError(lastEntry.result)) {
@@ -222,6 +225,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
 		if (stripped && stripped.length < 300) contextParts.push("", "Last error: " + stripped);
 	}
 	if (currentUrl) contextParts.push("", `URL: ${currentUrl}`);
+	if (protocolViolationHint) contextParts.push("", protocolViolationHint);
 	if (latestSnapshot) contextParts.push("", "## Latest DOM snapshot", "Use hash IDs from this snapshot. Do NOT call page_info — this is already the latest.", wrapSnapshot(latestSnapshot));
 	messages.push({
 		role: "user",
@@ -385,9 +389,12 @@ async function executeAgentLoop(params) {
 	let outputTokens = 0;
 	let remainingInstruction = message.trim();
 	let previousRoundTasks = [];
+	let previousRoundPlannedTasks = [];
+	let previousRoundModelOutput = "";
 	let lastPlannedBatchKey = "";
 	let consecutiveSamePlannedBatch = 0;
 	let lastRoundHadError = false;
+	let protocolViolationHint;
 	let recoveryCount = 0;
 	let redundantInterceptCount = 0;
 	let pendingNotFoundRetry;
@@ -449,6 +456,20 @@ async function executeAgentLoop(params) {
 		return `${tc.name}:${inputText}`;
 	});
 	/**
+	* 规范化模型文本输出（中）/ Normalize model text for next-round input (EN).
+	*
+	* 优先保留 REMAINING 行；否则截断首段文本，避免长篇规划污染下一轮输入。
+	* Prefer REMAINING line; otherwise keep a short excerpt to avoid long planning spillover.
+	*/
+	const normalizeModelOutput = (text) => {
+		if (!text) return "";
+		const trimmed = text.trim();
+		if (!trimmed) return "";
+		const remainingMatch = trimmed.match(/REMAINING\s*:\s*([\s\S]*)$/i);
+		if (remainingMatch) return `REMAINING: ${remainingMatch[1].trim()}`;
+		return (trimmed.split(/\n\s*\n/)[0]?.trim() ?? trimmed).slice(0, 220);
+	};
+	/**
 	* 判定动作是否会触发 DOM 结构变化（中）/ Whether action may cause DOM-shape change (EN).
 	*
 	* 触发后应强制断轮，等待下一轮新快照继续。
@@ -490,8 +511,8 @@ async function executeAgentLoop(params) {
 	/**
 	* 推进下一轮描述（中）/ Derive next-round instruction from model text (EN).
 	*
-	* 优先 REMAINING 协议；若未提供，则把本轮 content 视为“更新后的任务描述”。
-	* Priority: REMAINING protocol first; otherwise treat current content as updated instruction.
+	* 优先 REMAINING 协议；若未提供，则保持当前 remaining 不变。
+	* Priority: REMAINING protocol first; otherwise keep current remaining instruction unchanged.
 	*/
 	const deriveNextInstruction = (text, currentInstruction) => {
 		const parsed = parseRemainingInstruction(text);
@@ -504,12 +525,26 @@ async function executeAgentLoop(params) {
 			hasRemainingProtocol: false
 		};
 	};
+	/**
+	* 启发式任务剔除（中）/ Heuristic remaining reduction for linear instructions (EN).
+	*
+	* 在 REMAINING 缺失但本轮有执行动作时，按“线性片段”剔除已执行步数，避免下一轮继续携带整段原任务。
+	* When REMAINING is missing but actions were executed, drop executed step count from a linearized instruction.
+	*/
+	const reduceRemainingHeuristically = (currentInstruction, executedCount) => {
+		if (!currentInstruction.trim() || executedCount <= 0) return currentInstruction;
+		const parts = currentInstruction.replace(/\s+/g, " ").replace(/(->|=>|→)/g, " 然后 ").replace(/[，,。；;]/g, " 然后 ").split(/\s*(?:然后|再|并且|并|接着|随后|之后)\s*/g).map((part) => part.trim()).filter(Boolean);
+		if (parts.length <= 1) return currentInstruction;
+		const nextParts = parts.slice(Math.min(executedCount, parts.length));
+		if (nextParts.length === 0) return "";
+		return nextParts.join(" -> ");
+	};
 	for (let round = 0; round < maxRounds; round++) {
 		callbacks?.onRound?.(round);
 		usedRounds = round + 1;
 		if (!pageContext.latestSnapshot) await refreshSnapshot();
 		const effectivePrompt = stripSnapshotFromPrompt(systemPrompt);
-		const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks);
+		const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint);
 		if (pendingNotFoundRetry && pendingNotFoundRetry.tasks.length > 0) chatMessages.push({
 			role: "user",
 			content: [
@@ -528,8 +563,7 @@ async function executeAgentLoop(params) {
 		});
 		inputTokens += response.usage?.inputTokens ?? 0;
 		outputTokens += response.usage?.outputTokens ?? 0;
-		const nextInstructionState = deriveNextInstruction(response.text, remainingInstruction);
-		remainingInstruction = nextInstructionState.nextInstruction;
+		const parsedInstructionState = deriveNextInstruction(response.text, remainingInstruction);
 		if (!response.toolCalls || response.toolCalls.length === 0) {
 			if (pendingNotFoundRetry) {
 				const unresolvedHint = response.text?.toLowerCase() ?? "";
@@ -545,10 +579,29 @@ async function executeAgentLoop(params) {
 				}
 				pendingNotFoundRetry = void 0;
 			}
+			if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
+			if (remainingInstruction.trim().length > 0 && round < maxRounds - 1) {
+				protocolViolationHint = [
+					"Protocol violation in previous round:",
+					"- Remaining task is not DONE, but no tool calls were returned.",
+					"This round MUST do one of:",
+					"1) Return actionable tool calls for visible targets; or",
+					"2) If truly complete, return a short summary and EXACTLY `REMAINING: DONE`.",
+					"Do NOT output planning/explaining text."
+				].join("\n");
+				lastRoundHadError = true;
+				await refreshSnapshot();
+				continue;
+			}
 			finalReply = response.text ?? "";
 			if (finalReply) callbacks?.onText?.(finalReply);
 			break;
 		}
+		protocolViolationHint = void 0;
+		const plannedTasksCurrentRound = buildTaskArray(response.toolCalls.map((tc) => ({
+			name: tc.name,
+			input: tc.input
+		})));
 		const plannedBatchKey = JSON.stringify(response.toolCalls.map((tc) => ({
 			name: tc.name,
 			input: tc.input
@@ -617,9 +670,16 @@ async function executeAgentLoop(params) {
 			tasks: roundMissingTasks
 		};
 		else pendingNotFoundRetry = void 0;
-		if (!nextInstructionState.hasRemainingProtocol) roundHasError = true;
+		if (parsedInstructionState.hasRemainingProtocol) remainingInstruction = parsedInstructionState.nextInstruction;
+		else {
+			const nextByHeuristic = reduceRemainingHeuristically(remainingInstruction, executedTaskCalls.length);
+			if (nextByHeuristic !== remainingInstruction) remainingInstruction = nextByHeuristic;
+			else roundHasError = true;
+		}
+		previousRoundModelOutput = parsedInstructionState.hasRemainingProtocol ? normalizeModelOutput(response.text) : `REMAINING: ${remainingInstruction || "DONE"}`;
 		lastRoundHadError = roundHasError;
 		previousRoundTasks = buildTaskArray(executedTaskCalls);
+		previousRoundPlannedTasks = plannedTasksCurrentRound;
 		const idleResult = detectIdleLoop(executedTaskCalls.map((tc) => tc.name), consecutiveReadOnlyRounds);
 		if (idleResult === -1) {
 			finalReply = response.text || "任务已完成。";