agentpage 0.0.45 → 0.0.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/dist/index.mjs +49 -4
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -9,7 +9,10 @@
|
|
|
9
9
|
> 核心主张:通过 **Prompt + Tools + 路由**,快速为网站实现 AI 赋能,并构建**前端运行时 AI Skill**。AutoPilot 本质上是一个运行在前端浏览器中的 AI Agent。
|
|
10
10
|
|
|
11
11
|
[](LICENSE)
|
|
12
|
+
<a href="https://www.typescriptlang.org/"><img src="https://img.shields.io/badge/language-TypeScript-3178C6" alt="TypeScript" /></a>
|
|
12
13
|
<a href="https://www.npmjs.com/package/agentpage"><img src="https://img.shields.io/npm/v/agentpage" alt="npm" /></a>
|
|
14
|
+
<a href="https://www.npmjs.com/package/agentpage"><img src="https://img.shields.io/npm/dm/agentpage" alt="downloads" /></a>
|
|
15
|
+
<a href="https://bundlephobia.com/package/agentpage"><img src="https://img.shields.io/bundlephobia/minzip/agentpage" alt="minzipped size" /></a>
|
|
13
16
|
|
|
14
17
|
AutoPilot 的目标不是生成文本,而是在浏览器中完成真实任务:点击、填写、导航、等待、执行脚本,并在每一轮根据最新页面状态持续推进。
|
|
15
18
|
|
|
@@ -76,7 +79,7 @@ npm install agentpage
|
|
|
76
79
|
|
|
77
80
|
- **Prompt + Tools + 路由三层解耦**:可以快速把"可执行 AI 能力"植入现有前端系统,按路由渐进式接入,支持"项目级工具 + 路由级工具"组合。
|
|
78
81
|
- **增量任务消费协议(REMAINING)**:任务不是一次性执行,而是逐轮消费收敛。每轮只做当前快照可执行的动作,通过 `REMAINING` 协议跟踪进度,支持协议修复和启发式回退,确保复杂多步任务稳定收敛。
|
|
79
|
-
- **
|
|
82
|
+
- **9 层保护机制**:冗余拦截、快照防抖、元素恢复、Not-found 重试对话流、导航刷新、空转检测、重复批次防自转、协议修复、快照指纹变化检测 —— 目标是**稳定收敛**,而不是偶然成功。
|
|
80
83
|
- **Playwright 级别交互语义**:完整 pointer/mouse 事件链、4 种 scrollIntoView 策略轮换、actionability 五重检查(可见/稳定/可用/可编辑/遮挡)、智能重定向 retarget、隐藏控件代理点击(ElementPlus/AntD)、`select_option` value/label/index 三策略。
|
|
81
84
|
- **运行时事件信号追踪**:通过 `EventTarget.prototype` 补丁全局追踪事件绑定,快照中输出 `listeners="clk,inp,chg"` 信号,帮助 AI 精准识别真实可交互元素,而非猜测。
|
|
82
85
|
- **效果验证机制(Effect Check)**:每轮行动前自动检查上轮操作是否在当前快照中生效,未生效则尝试邻近元素,避免重复点击无效目标。
|
|
@@ -229,6 +232,7 @@ Round 3: 执行 C → REMAINING: DONE
|
|
|
229
232
|
| 重复批次防自转 | 连续两轮相同任务批次 | 直接终止 |
|
|
230
233
|
| 协议修复回合 | remaining 未完成却无工具调用 | 注入强约束提示 |
|
|
231
234
|
| 轮次稳定等待 | 本轮有 DOM 变化动作 | loading hidden + DOM quiet(200ms/4s) |
|
|
235
|
+
| 快照指纹变化检测 | 本轮有 DOM 变更动作且行动后指纹不变 | 注入 `Snapshot unchanged` 提示,强制模型换目标 |
|
|
232
236
|
|
|
233
237
|
### 5. 停机条件
|
|
234
238
|
|
package/dist/index.mjs
CHANGED
|
@@ -88,6 +88,29 @@ function extractHashSelectorRef(toolInput) {
|
|
|
88
88
|
return m ? m[1] : null;
|
|
89
89
|
}
|
|
90
90
|
/**
|
|
91
|
+
* 快照指纹计算 — 用于轮次间快照变化检测。
|
|
92
|
+
*
|
|
93
|
+
* 元素的 #hashID(如 `#1kry9hw`)可能因 DOM 重新渲染而变化,
|
|
94
|
+
* 但页面实际内容并未改变。因此先将 hashID 替换为占位符 `#_`,
|
|
95
|
+
* 再计算 djb2 哈希,确保指纹只反映真实页面结构和文本差异。
|
|
96
|
+
*
|
|
97
|
+
* 用途:轮次行动前后各算一次指纹,若一致说明操作未产生任何可见效果。
|
|
98
|
+
*/
|
|
99
|
+
function computeSnapshotFingerprint(snapshot) {
|
|
100
|
+
if (!snapshot) return "";
|
|
101
|
+
return _djb2(snapshot.replace(/#[a-z0-9]{4,}/gi, "#_"));
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* djb2 字符串哈希(非加密)。
|
|
105
|
+
*
|
|
106
|
+
* 纯粹用于快照指纹比对,不用于安全场景。
|
|
107
|
+
*/
|
|
108
|
+
function _djb2(str) {
|
|
109
|
+
let hash = 5381;
|
|
110
|
+
for (let i = 0; i < str.length; i++) hash = (hash << 5) + hash + str.charCodeAt(i) | 0;
|
|
111
|
+
return (hash >>> 0).toString(36);
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
91
114
|
* 构建任务数组。
|
|
92
115
|
*
|
|
93
116
|
* 作用:把一轮工具调用规整成稳定字符串数组,
|
|
@@ -542,7 +565,7 @@ function buildCompactMessages(userMessage, trace, latestSnapshot, currentUrl, hi
|
|
|
542
565
|
];
|
|
543
566
|
if (hasErrors) contextParts.push("", "Last step failed. Retry differently or skip to other targets.");
|
|
544
567
|
else contextParts.push("", "If fully done, reply summary only (no tools).");
|
|
545
|
-
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous executed:", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`), "If any had no visible effect, try a
|
|
568
|
+
if (previousRoundTasks && previousRoundTasks.length > 0) contextParts.push("", "Previous executed:", ...previousRoundTasks.map((task, index) => `${index + 1}. ${task}`), "If any had no visible effect (snapshot unchanged), do NOT repeat — try a child <a>/<button> inside the target, or a sibling/parent with stronger click signal.");
|
|
546
569
|
if (previousRoundPlannedTasks && previousRoundPlannedTasks.length > 0) contextParts.push("", "Previous planned:", ...previousRoundPlannedTasks.map((task, index) => `${index + 1}. ${task}`));
|
|
547
570
|
if (previousRoundModelOutput) contextParts.push("", "Previous model output:", previousRoundModelOutput);
|
|
548
571
|
const lastEntry = trace[trace.length - 1];
|
|
@@ -894,6 +917,7 @@ async function executeAgentLoop(params) {
|
|
|
894
917
|
callbacks?.onRound?.(round);
|
|
895
918
|
usedRounds = round + 1;
|
|
896
919
|
if (!pageContext.latestSnapshot) await refreshSnapshot();
|
|
920
|
+
const roundStartFingerprint = computeSnapshotFingerprint(pageContext.latestSnapshot || "");
|
|
897
921
|
const effectivePrompt = stripSnapshotFromPrompt(systemPrompt);
|
|
898
922
|
const chatMessages = buildCompactMessages(message, fullToolTrace, pageContext.latestSnapshot, pageContext.currentUrl, history, remainingInstruction, previousRoundTasks, previousRoundModelOutput, previousRoundPlannedTasks, protocolViolationHint);
|
|
899
923
|
if (pendingNotFoundRetry && pendingNotFoundRetry.tasks.length > 0) chatMessages.push({
|
|
@@ -965,11 +989,21 @@ async function executeAgentLoop(params) {
|
|
|
965
989
|
consecutiveSamePlannedBatch = 1;
|
|
966
990
|
lastPlannedBatchKey = plannedBatchKey;
|
|
967
991
|
}
|
|
968
|
-
if (consecutiveSamePlannedBatch >=
|
|
992
|
+
if (consecutiveSamePlannedBatch >= 3 && !lastRoundHadError) {
|
|
969
993
|
finalReply = response.text?.trim() || "任务已完成。";
|
|
970
994
|
if (finalReply) callbacks?.onText?.(finalReply);
|
|
971
995
|
break;
|
|
972
996
|
}
|
|
997
|
+
if (consecutiveSamePlannedBatch >= 2 && !lastRoundHadError) protocolViolationHint = [
|
|
998
|
+
"Repeated action warning:",
|
|
999
|
+
"- You performed the EXACT same tool call(s) as the previous round, but NO visible change occurred in the snapshot.",
|
|
1000
|
+
"The clicked element did not trigger navigation or DOM change. This round you MUST do ONE of:",
|
|
1001
|
+
"1) Look INSIDE the clicked container for an <a> link, <button>, or child element with clk/pdn/mdn listener, and click THAT instead;",
|
|
1002
|
+
"2) If there is a visible href/URL, use navigate.goto to go there directly;",
|
|
1003
|
+
"3) Try a completely different approach (e.g., search, filter, or navigate via sidebar);",
|
|
1004
|
+
"4) If the task is truly complete, return REMAINING: DONE with no tool calls.",
|
|
1005
|
+
"Do NOT repeat the same action again."
|
|
1006
|
+
].join("\n");
|
|
973
1007
|
if (dryRun) {
|
|
974
1008
|
finalReply = response.text ? response.text + "\n\n" : "";
|
|
975
1009
|
finalReply += "🔧 AI 请求调用以下工具(dry-run 模式,未执行):\n";
|
|
@@ -1072,6 +1106,17 @@ async function executeAgentLoop(params) {
|
|
|
1072
1106
|
consecutiveReadOnlyRounds = idleResult;
|
|
1073
1107
|
if (roundHasPotentialDomMutation) await runRoundStabilityBarrier();
|
|
1074
1108
|
await refreshSnapshot();
|
|
1109
|
+
if (roundHasPotentialDomMutation) {
|
|
1110
|
+
if (computeSnapshotFingerprint(pageContext.latestSnapshot || "") === roundStartFingerprint && roundStartFingerprint !== "") {
|
|
1111
|
+
const unchangedHint = [
|
|
1112
|
+
"Snapshot unchanged after action:",
|
|
1113
|
+
"- The page snapshot is IDENTICAL before and after your action(s) this round.",
|
|
1114
|
+
"- Your click/action had NO visible effect on the page. Do NOT repeat it.",
|
|
1115
|
+
"- Look INSIDE the target for <a>/<button>/child with clk listener, or use navigate.goto if href is visible."
|
|
1116
|
+
].join("\n");
|
|
1117
|
+
protocolViolationHint = protocolViolationHint ? protocolViolationHint + "\n\n" + unchangedHint : unchangedHint;
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1075
1120
|
}
|
|
1076
1121
|
const resultMessages = [...history ?? [], {
|
|
1077
1122
|
role: "user",
|
|
@@ -1869,7 +1914,7 @@ function buildSystemPrompt(params = {}) {
|
|
|
1869
1914
|
"- listeners=\"...\" = bound event handlers (abbrevs below). Prefer targets with matching listeners.",
|
|
1870
1915
|
"- Click target MUST have click signal: listeners containing clk/pdn/mdn, or onclick attr, or native <a>/<button>, or role=button/link. NEVER click elements with only blr/fcs (focus/blur) — they are not click targets.",
|
|
1871
1916
|
"- If the text you want to click has no click signal, look at its parent row/container or nearby sibling that does have clk listener.",
|
|
1872
|
-
"- No-effect fallback: try
|
|
1917
|
+
"- No-effect fallback: if a click produced no page change (snapshot unchanged), do NOT repeat the same target. Instead: (1) look for <a> links or <button> inside the clicked container; (2) try a parent or sibling with stronger click signal; (3) as last resort, try navigate.goto to the target URL if visible in an href.",
|
|
1873
1918
|
"- Batch fill/type/check/select_option freely within one round. A click always ends the round — send at most ONE click as the LAST action in a batch.",
|
|
1874
1919
|
"- Input order (MANDATORY): focus/click → fill/type/select_option per target. Multi-field: focus A→fill A→focus B→fill B.",
|
|
1875
1920
|
"- Search/filter inputs: after fill, press Enter (or click search button) to trigger the search. Do not assume fill alone submits.",
|
|
@@ -1877,7 +1922,7 @@ function buildSystemPrompt(params = {}) {
|
|
|
1877
1922
|
"- Steppers: compute delta from visible value, click exactly |delta| times. Check/uncheck: target real input control.",
|
|
1878
1923
|
"- DOM-changing action (click/modal/navigate): ends the round, next snapshot follows. Actions sent after a click in the same batch are discarded.",
|
|
1879
1924
|
"- Intermediate progress is NOT completion: if an action only opens, expands, reveals, filters, paginates, switches context, or loads the next step, keep REMAINING on the final user goal until the requested end state/value/content is visible in the snapshot.",
|
|
1880
|
-
"- Effect check: before planning new actions, confirm previous actions' expected effects are visible in current snapshot. If
|
|
1925
|
+
"- Effect check: before planning new actions, confirm previous actions' expected effects are visible in current snapshot. If the snapshot is unchanged after a click, the click FAILED — you MUST pick a different element (e.g., an <a> or <button> child inside the row, or the link text itself).",
|
|
1881
1926
|
"- Do NOT call page_info — snapshot is auto-refreshed and provided every round. Do NOT use get_text/get_attr to read what is already visible in the snapshot.",
|
|
1882
1927
|
"- Never repeat the same tool call (same name + same args) on the same target. If it didn't work, try a different approach.",
|
|
1883
1928
|
"- Dropdown/select: prefer dom.select_option (works in one round). For custom dropdowns requiring click-to-open: click → wait for next snapshot → click option (two rounds).",
|