browser-use 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +295 -686
- package/dist/actor/element.d.ts +19 -0
- package/dist/actor/element.js +46 -0
- package/dist/actor/index.d.ts +4 -0
- package/dist/actor/index.js +4 -0
- package/dist/actor/mouse.d.ts +19 -0
- package/dist/actor/mouse.js +39 -0
- package/dist/actor/page.d.ts +29 -0
- package/dist/actor/page.js +88 -0
- package/dist/actor/utils.d.ts +4 -0
- package/dist/actor/utils.js +35 -0
- package/dist/agent/cloud-events.d.ts +18 -0
- package/dist/agent/cloud-events.js +65 -2
- package/dist/agent/gif.d.ts +1 -0
- package/dist/agent/gif.js +24 -2
- package/dist/agent/judge.d.ts +17 -0
- package/dist/agent/judge.js +197 -0
- package/dist/agent/message-manager/service.d.ts +12 -4
- package/dist/agent/message-manager/service.js +205 -39
- package/dist/agent/message-manager/utils.js +0 -1
- package/dist/agent/message-manager/views.d.ts +4 -0
- package/dist/agent/message-manager/views.js +11 -7
- package/dist/agent/prompts.d.ts +24 -3
- package/dist/agent/prompts.js +274 -59
- package/dist/agent/service.d.ts +103 -41
- package/dist/agent/service.js +2336 -472
- package/dist/agent/variable-detector.d.ts +12 -0
- package/dist/agent/variable-detector.js +211 -0
- package/dist/agent/views.d.ts +237 -18
- package/dist/agent/views.js +446 -33
- package/dist/browser/cloud/cloud.d.ts +20 -0
- package/dist/browser/cloud/cloud.js +129 -0
- package/dist/browser/cloud/index.d.ts +2 -0
- package/dist/browser/cloud/index.js +2 -0
- package/dist/browser/cloud/views.d.ts +41 -0
- package/dist/browser/cloud/views.js +35 -0
- package/dist/browser/events.d.ts +345 -0
- package/dist/browser/events.js +566 -0
- package/dist/browser/extensions.js +17 -17
- package/dist/browser/index.d.ts +4 -0
- package/dist/browser/index.js +4 -0
- package/dist/browser/profile.d.ts +10 -4
- package/dist/browser/profile.js +79 -12
- package/dist/browser/session-manager.d.ts +85 -0
- package/dist/browser/session-manager.js +208 -0
- package/dist/browser/session.d.ts +105 -9
- package/dist/browser/session.js +1166 -95
- package/dist/browser/types.d.ts +153 -156
- package/dist/browser/views.d.ts +39 -0
- package/dist/browser/views.js +32 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
- package/dist/browser/watchdogs/base.d.ts +21 -0
- package/dist/browser/watchdogs/base.js +81 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
- package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
- package/dist/browser/watchdogs/crash-watchdog.js +296 -0
- package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
- package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
- package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/dom-watchdog.js +31 -0
- package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
- package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
- package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
- package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
- package/dist/browser/watchdogs/index.d.ts +15 -0
- package/dist/browser/watchdogs/index.js +15 -0
- package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
- package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
- package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
- package/dist/browser/watchdogs/popups-watchdog.js +77 -0
- package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
- package/dist/browser/watchdogs/recording-watchdog.js +249 -0
- package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
- package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
- package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/security-watchdog.js +84 -0
- package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
- package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
- package/dist/cli.d.ts +7 -2
- package/dist/cli.js +182 -25
- package/dist/code-use/formatting.d.ts +3 -0
- package/dist/code-use/formatting.js +18 -0
- package/dist/code-use/index.d.ts +6 -0
- package/dist/code-use/index.js +6 -0
- package/dist/code-use/namespace.d.ts +5 -0
- package/dist/code-use/namespace.js +81 -0
- package/dist/code-use/notebook-export.d.ts +3 -0
- package/dist/code-use/notebook-export.js +56 -0
- package/dist/code-use/service.d.ts +24 -0
- package/dist/code-use/service.js +104 -0
- package/dist/code-use/utils.d.ts +4 -0
- package/dist/code-use/utils.js +98 -0
- package/dist/code-use/views.d.ts +108 -0
- package/dist/code-use/views.js +165 -0
- package/dist/config.d.ts +15 -0
- package/dist/config.js +109 -7
- package/dist/controller/registry/service.d.ts +10 -1
- package/dist/controller/registry/service.js +266 -10
- package/dist/controller/registry/views.d.ts +4 -1
- package/dist/controller/registry/views.js +25 -2
- package/dist/controller/service.d.ts +10 -1
- package/dist/controller/service.js +1814 -268
- package/dist/controller/views.d.ts +78 -155
- package/dist/controller/views.js +61 -12
- package/dist/dom/history-tree-processor/service.d.ts +5 -0
- package/dist/dom/history-tree-processor/service.js +169 -14
- package/dist/dom/history-tree-processor/view.d.ts +7 -1
- package/dist/dom/history-tree-processor/view.js +10 -1
- package/dist/dom/markdown-extractor.d.ts +37 -0
- package/dist/dom/markdown-extractor.js +345 -0
- package/dist/dom/service.d.ts +3 -1
- package/dist/dom/service.js +76 -0
- package/dist/dom/views.d.ts +1 -0
- package/dist/dom/views.js +45 -0
- package/dist/event-bus.d.ts +107 -7
- package/dist/event-bus.js +313 -10
- package/dist/exceptions.d.ts +0 -3
- package/dist/exceptions.js +0 -7
- package/dist/filesystem/file-system.d.ts +18 -0
- package/dist/filesystem/file-system.js +503 -42
- package/dist/index.d.ts +7 -0
- package/dist/index.js +6 -0
- package/dist/integrations/gmail/actions.d.ts +3 -3
- package/dist/integrations/gmail/actions.js +4 -4
- package/dist/llm/anthropic/chat.d.ts +18 -1
- package/dist/llm/anthropic/chat.js +123 -55
- package/dist/llm/anthropic/serializer.d.ts +2 -0
- package/dist/llm/anthropic/serializer.js +81 -9
- package/dist/llm/aws/chat-anthropic.d.ts +17 -0
- package/dist/llm/aws/chat-anthropic.js +126 -26
- package/dist/llm/aws/chat-bedrock.d.ts +28 -1
- package/dist/llm/aws/chat-bedrock.js +161 -34
- package/dist/llm/aws/serializer.d.ts +13 -1
- package/dist/llm/aws/serializer.js +56 -17
- package/dist/llm/azure/chat.d.ts +53 -2
- package/dist/llm/azure/chat.js +366 -54
- package/dist/llm/base.d.ts +2 -0
- package/dist/llm/browser-use/chat.d.ts +40 -0
- package/dist/llm/browser-use/chat.js +305 -0
- package/dist/llm/browser-use/index.d.ts +1 -0
- package/dist/llm/browser-use/index.js +1 -0
- package/dist/llm/cerebras/chat.d.ts +39 -0
- package/dist/llm/cerebras/chat.js +178 -0
- package/dist/llm/cerebras/index.d.ts +2 -0
- package/dist/llm/cerebras/index.js +2 -0
- package/dist/llm/cerebras/serializer.d.ts +7 -0
- package/dist/llm/cerebras/serializer.js +82 -0
- package/dist/llm/deepseek/chat.d.ts +19 -2
- package/dist/llm/deepseek/chat.js +138 -25
- package/dist/llm/google/chat.d.ts +46 -2
- package/dist/llm/google/chat.js +267 -64
- package/dist/llm/google/serializer.d.ts +9 -1
- package/dist/llm/google/serializer.js +141 -34
- package/dist/llm/groq/chat.d.ts +21 -2
- package/dist/llm/groq/chat.js +125 -26
- package/dist/llm/groq/parser.js +3 -1
- package/dist/llm/mistral/chat.d.ts +43 -0
- package/dist/llm/mistral/chat.js +154 -0
- package/dist/llm/mistral/index.d.ts +2 -0
- package/dist/llm/mistral/index.js +2 -0
- package/dist/llm/mistral/schema.d.ts +8 -0
- package/dist/llm/mistral/schema.js +27 -0
- package/dist/llm/models.d.ts +2 -0
- package/dist/llm/models.js +317 -0
- package/dist/llm/ollama/chat.d.ts +13 -1
- package/dist/llm/ollama/chat.js +110 -19
- package/dist/llm/ollama/serializer.d.ts +1 -0
- package/dist/llm/ollama/serializer.js +34 -12
- package/dist/llm/openai/chat.d.ts +16 -0
- package/dist/llm/openai/chat.js +94 -44
- package/dist/llm/openai/like.d.ts +5 -3
- package/dist/llm/openai/like.js +7 -3
- package/dist/llm/openai/responses-serializer.d.ts +18 -0
- package/dist/llm/openai/responses-serializer.js +72 -0
- package/dist/llm/openrouter/chat.d.ts +28 -2
- package/dist/llm/openrouter/chat.js +115 -29
- package/dist/llm/schema.d.ts +11 -1
- package/dist/llm/schema.js +109 -4
- package/dist/llm/vercel/chat.d.ts +50 -0
- package/dist/llm/vercel/chat.js +276 -0
- package/dist/llm/vercel/index.d.ts +1 -0
- package/dist/llm/vercel/index.js +1 -0
- package/dist/llm/vercel/serializer.d.ts +5 -0
- package/dist/llm/vercel/serializer.js +7 -0
- package/dist/llm/views.d.ts +2 -1
- package/dist/llm/views.js +3 -1
- package/dist/logging-config.d.ts +2 -0
- package/dist/logging-config.js +82 -29
- package/dist/mcp/client.d.ts +10 -5
- package/dist/mcp/client.js +14 -9
- package/dist/mcp/controller.d.ts +42 -3
- package/dist/mcp/controller.js +56 -31
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.js +261 -52
- package/dist/observability.js +10 -4
- package/dist/sandbox/index.d.ts +2 -0
- package/dist/sandbox/index.js +2 -0
- package/dist/sandbox/sandbox.d.ts +19 -0
- package/dist/sandbox/sandbox.js +140 -0
- package/dist/sandbox/views.d.ts +67 -0
- package/dist/sandbox/views.js +121 -0
- package/dist/skill-cli/index.d.ts +3 -0
- package/dist/skill-cli/index.js +3 -0
- package/dist/skill-cli/protocol.d.ts +30 -0
- package/dist/skill-cli/protocol.js +48 -0
- package/dist/skill-cli/server.d.ts +11 -0
- package/dist/skill-cli/server.js +85 -0
- package/dist/skill-cli/sessions.d.ts +24 -0
- package/dist/skill-cli/sessions.js +47 -0
- package/dist/skills/index.d.ts +3 -0
- package/dist/skills/index.js +3 -0
- package/dist/skills/service.d.ts +27 -0
- package/dist/skills/service.js +266 -0
- package/dist/skills/utils.d.ts +6 -0
- package/dist/skills/utils.js +53 -0
- package/dist/skills/views.d.ts +40 -0
- package/dist/skills/views.js +10 -0
- package/dist/sync/auth.js +8 -3
- package/dist/sync/service.d.ts +6 -6
- package/dist/sync/service.js +54 -89
- package/dist/telemetry/views.d.ts +20 -6
- package/dist/telemetry/views.js +23 -5
- package/dist/tokens/custom-pricing.d.ts +2 -0
- package/dist/tokens/custom-pricing.js +22 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/dist/tokens/mappings.d.ts +1 -0
- package/dist/tokens/mappings.js +3 -0
- package/dist/tokens/service.js +27 -8
- package/dist/tools/extraction/index.d.ts +2 -0
- package/dist/tools/extraction/index.js +2 -0
- package/dist/tools/extraction/schema-utils.d.ts +6 -0
- package/dist/tools/extraction/schema-utils.js +237 -0
- package/dist/tools/extraction/views.d.ts +7 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.js +5 -0
- package/dist/tools/registry/index.d.ts +2 -0
- package/dist/tools/registry/index.js +2 -0
- package/dist/tools/registry/service.d.ts +1 -0
- package/dist/tools/registry/service.js +1 -0
- package/dist/tools/registry/views.d.ts +1 -0
- package/dist/tools/registry/views.js +1 -0
- package/dist/tools/service.d.ts +2 -0
- package/dist/tools/service.js +1 -0
- package/dist/tools/utils.d.ts +2 -0
- package/dist/tools/utils.js +57 -0
- package/dist/tools/views.d.ts +1 -0
- package/dist/tools/views.js +1 -0
- package/dist/utils.d.ts +10 -1
- package/dist/utils.js +70 -3
- package/package.json +116 -49
- package/dist/dom/playground/process-dom.js +0 -5
- package/dist/dom/playground/test-accessibility.d.ts +0 -44
- package/dist/dom/playground/test-accessibility.js +0 -111
- /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { ContentPartImageParam, ContentPartTextParam, ImageURL, SystemMessage, UserMessage, } from '../llm/messages.js';
|
|
3
|
+
const truncateText = (text, maxLength, fromBeginning = false) => {
|
|
4
|
+
if (text.length <= maxLength) {
|
|
5
|
+
return text;
|
|
6
|
+
}
|
|
7
|
+
if (fromBeginning) {
|
|
8
|
+
return `...[text truncated]${text.slice(-(maxLength - 20))}`;
|
|
9
|
+
}
|
|
10
|
+
return `${text.slice(0, maxLength - 23)}...[text truncated]...`;
|
|
11
|
+
};
|
|
12
|
+
const encodeImage = (imagePath) => {
|
|
13
|
+
try {
|
|
14
|
+
if (!fs.existsSync(imagePath)) {
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
return fs.readFileSync(imagePath).toString('base64');
|
|
18
|
+
}
|
|
19
|
+
catch {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
export const construct_judge_messages = (options) => {
|
|
24
|
+
const { task, final_result, agent_steps, screenshot_paths, max_images = 10, ground_truth = null, use_vision = true, } = options;
|
|
25
|
+
const task_truncated = truncateText(task, 40000);
|
|
26
|
+
const final_result_truncated = truncateText(final_result, 40000);
|
|
27
|
+
const steps_text_truncated = truncateText(agent_steps.join('\n'), 40000);
|
|
28
|
+
const encoded_images = [];
|
|
29
|
+
if (use_vision !== false) {
|
|
30
|
+
const selected = screenshot_paths.length > max_images
|
|
31
|
+
? screenshot_paths.slice(-max_images)
|
|
32
|
+
: screenshot_paths;
|
|
33
|
+
for (const screenshotPath of selected) {
|
|
34
|
+
const encoded = encodeImage(screenshotPath);
|
|
35
|
+
if (!encoded) {
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
encoded_images.push(new ContentPartImageParam(new ImageURL(`data:image/png;base64,${encoded}`, 'auto', 'image/png')));
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
const ground_truth_section = ground_truth
|
|
42
|
+
? `
|
|
43
|
+
**GROUND TRUTH VALIDATION (HIGHEST PRIORITY):**
|
|
44
|
+
The <ground_truth> section contains verified correct information for this task. This can be:
|
|
45
|
+
- Evaluation criteria: Specific conditions that must be met (e.g., "The success popup should show up", "Must extract exactly 5 items")
|
|
46
|
+
- Factual answers: The correct answer to a question or information retrieval task (e.g. "10/11/24", "Paris")
|
|
47
|
+
- Expected outcomes: What should happen after task completion (e.g., "Google Doc must be created", "File should be downloaded")
|
|
48
|
+
|
|
49
|
+
The ground truth takes ABSOLUTE precedence over all other evaluation criteria. If the ground truth is not satisfied by the agent's execution and final response, the verdict MUST be false.
|
|
50
|
+
`
|
|
51
|
+
: '';
|
|
52
|
+
const system_prompt = `You are an expert judge evaluating browser automation agent performance.
|
|
53
|
+
|
|
54
|
+
<evaluation_framework>
|
|
55
|
+
${ground_truth_section}
|
|
56
|
+
**PRIMARY EVALUATION CRITERIA (in order of importance):**
|
|
57
|
+
1. **Task Satisfaction (Most Important)**: Did the agent accomplish what the user asked for? Break down the task into the key criteria and evaluate if the agent all of them. Focus on user intent and final outcome.
|
|
58
|
+
2. **Output Quality**: Is the final result in the correct format and complete? Does it match exactly what was requested?
|
|
59
|
+
3. **Tool Effectiveness**: Did the browser interactions work as expected? Were tools used appropriately? How many % of the tools failed?
|
|
60
|
+
4. **Agent Reasoning**: Quality of decision-making, planning, and problem-solving throughout the trajectory.
|
|
61
|
+
5. **Browser Handling**: Navigation stability, error recovery, and technical execution. If the browser crashes, does not load or a captcha blocks the task, the score must be very low.
|
|
62
|
+
|
|
63
|
+
**VERDICT GUIDELINES:**
|
|
64
|
+
- true: Task completed as requested, human-like execution, all of the users criteria were met and the agent did not make up any information.
|
|
65
|
+
- false: Task not completed, or only partially completed.
|
|
66
|
+
|
|
67
|
+
**Examples of task completion verdict:**
|
|
68
|
+
- If task asks for 10 items and agent finds 4 items correctly: false
|
|
69
|
+
- If task completed to full user requirements but with some errors to improve in the trajectory: true
|
|
70
|
+
- If task impossible due to captcha/login requirements: false
|
|
71
|
+
- If the trajectory is ideal and the output is perfect: true
|
|
72
|
+
- If the task asks to search all headphones in amazon under $100 but the agent searches all headphones and the lowest price is $150: false
|
|
73
|
+
- If the task asks to research a property and create a google doc with the result but the agents only returns the results in text: false
|
|
74
|
+
- If the task asks to complete an action on the page, and the agent reports that the action is completed but the screenshot or page shows the action is not actually complete: false
|
|
75
|
+
- If the task asks to use a certain tool or site to complete the task but the agent completes the task without using it: false
|
|
76
|
+
- If the task asks to look for a section of a page that does not exist: false
|
|
77
|
+
- If the agent concludes the task is impossible but it is not: false
|
|
78
|
+
- If the agent concludes the task is impossible and it truly is impossible: false
|
|
79
|
+
- If the agent is unable to complete the task because no login information was provided and it is truly needed to complete the task: false
|
|
80
|
+
|
|
81
|
+
**FAILURE CONDITIONS (automatically set verdict to false):**
|
|
82
|
+
- Blocked by captcha or missing authentication
|
|
83
|
+
- Output format completely wrong or missing
|
|
84
|
+
- Infinite loops or severe technical failures
|
|
85
|
+
- Critical user requirements ignored
|
|
86
|
+
- Page not loaded
|
|
87
|
+
- Browser crashed
|
|
88
|
+
- Agent could not interact with required UI elements
|
|
89
|
+
- The agent moved on from a important step in the task without completing it
|
|
90
|
+
- The agent made up content that is not in the screenshot or the page state
|
|
91
|
+
- The agent calls done action before completing all key points of the task
|
|
92
|
+
|
|
93
|
+
**IMPOSSIBLE TASK DETECTION:**
|
|
94
|
+
Set impossible_task to true when the task fundamentally could not be completed due to:
|
|
95
|
+
- Vague or ambiguous task instructions that cannot be reasonably interpreted
|
|
96
|
+
- Website genuinely broken or non-functional (be conservative - temporary issues don't count)
|
|
97
|
+
- Required links/pages truly inaccessible (404, 403, etc.)
|
|
98
|
+
- Task requires authentication/login but no credentials were provided
|
|
99
|
+
- Task asks for functionality that doesn't exist on the target site
|
|
100
|
+
- Other insurmountable external obstacles beyond the agent's control
|
|
101
|
+
|
|
102
|
+
Do NOT mark as impossible if:
|
|
103
|
+
- Agent made poor decisions but task was achievable
|
|
104
|
+
- Temporary page loading issues that could be retried
|
|
105
|
+
- Agent didn't try the right approach
|
|
106
|
+
- Website works but agent struggled with it
|
|
107
|
+
|
|
108
|
+
**CAPTCHA DETECTION:**
|
|
109
|
+
Set reached_captcha to true if:
|
|
110
|
+
- Screenshots show captcha challenges (reCAPTCHA, hCaptcha, etc.)
|
|
111
|
+
- Agent reports being blocked by bot detection
|
|
112
|
+
- Error messages indicate captcha/verification requirements
|
|
113
|
+
- Any evidence the agent encountered anti-bot measures during execution
|
|
114
|
+
|
|
115
|
+
**IMPORTANT EVALUATION NOTES:**
|
|
116
|
+
- **evaluate for action** - For each key step of the trace, double check whether the action that the agent tried to performed actually happened. If the required action did not actually occur, the verdict should be false.
|
|
117
|
+
- **screenshot is not entire content** - The agent has the entire DOM content, but the screenshot is only part of the content. If the agent extracts information from the page, but you do not see it in the screenshot, you can assume this information is there.
|
|
118
|
+
- **Penalize poor tool usage** - Wrong tools, inefficient approaches, ignoring available information.
|
|
119
|
+
- **ignore unexpected dates and times** - These agent traces are from varying dates, you can assume the dates the agent uses for search or filtering are correct.
|
|
120
|
+
- **IMPORTANT**: be very picky about the user's request - Have very high standard for the agent completing the task exactly to the user's request.
|
|
121
|
+
- **IMPORTANT**: be initially doubtful of the agent's self reported success, be sure to verify that its methods are valid and fulfill the user's desires to a tee.
|
|
122
|
+
</evaluation_framework>
|
|
123
|
+
|
|
124
|
+
<response_format>
|
|
125
|
+
Respond with EXACTLY this JSON structure (no additional text before or after):
|
|
126
|
+
|
|
127
|
+
{
|
|
128
|
+
"reasoning": "Breakdown of user task into key points. Detailed analysis covering: what went well, what didn't work, trajectory quality assessment, tool usage evaluation, output quality review, and overall user satisfaction prediction.",
|
|
129
|
+
"verdict": true or false,
|
|
130
|
+
"failure_reason": "Max 5 sentences explanation of why the task was not completed successfully in case of failure. If verdict is true, use an empty string.",
|
|
131
|
+
"impossible_task": true or false,
|
|
132
|
+
"reached_captcha": true or false
|
|
133
|
+
}
|
|
134
|
+
</response_format>`;
|
|
135
|
+
const ground_truth_prompt = ground_truth
|
|
136
|
+
? `
|
|
137
|
+
<ground_truth>
|
|
138
|
+
${ground_truth}
|
|
139
|
+
</ground_truth>
|
|
140
|
+
`
|
|
141
|
+
: '';
|
|
142
|
+
const user_prompt = `<task>
|
|
143
|
+
${task_truncated || 'No task provided'}
|
|
144
|
+
</task>
|
|
145
|
+
${ground_truth_prompt}
|
|
146
|
+
<agent_trajectory>
|
|
147
|
+
${steps_text_truncated || 'No agent trajectory provided'}
|
|
148
|
+
</agent_trajectory>
|
|
149
|
+
|
|
150
|
+
<final_result>
|
|
151
|
+
${final_result_truncated || 'No final result provided'}
|
|
152
|
+
</final_result>
|
|
153
|
+
|
|
154
|
+
${encoded_images.length} screenshots from execution are attached.
|
|
155
|
+
|
|
156
|
+
Evaluate this agent execution given the criteria and respond with the exact JSON structure requested.`;
|
|
157
|
+
const content_parts = [
|
|
158
|
+
new ContentPartTextParam(user_prompt),
|
|
159
|
+
];
|
|
160
|
+
content_parts.push(...encoded_images);
|
|
161
|
+
return [new SystemMessage(system_prompt), new UserMessage(content_parts)];
|
|
162
|
+
};
|
|
163
|
+
export const construct_simple_judge_messages = (options) => {
|
|
164
|
+
const task_truncated = truncateText(options.task, 20000);
|
|
165
|
+
const final_result_truncated = truncateText(options.final_result, 20000);
|
|
166
|
+
const current_date = options.current_date ?? new Date().toISOString().slice(0, 10);
|
|
167
|
+
const system_prompt = `You are a strict verifier checking whether a browser automation agent actually completed its task.
|
|
168
|
+
|
|
169
|
+
Today's date is ${current_date}. The agent ran recently - dates near today are expected and NOT fabricated.
|
|
170
|
+
|
|
171
|
+
Given the task and the agent's final response, determine if the response genuinely satisfies ALL requirements.
|
|
172
|
+
|
|
173
|
+
Check for these common failure patterns:
|
|
174
|
+
1. **Incorrect data**: Wrong number of items, missing filters/criteria, wrong format
|
|
175
|
+
2. **Unverified actions**: Agent claims to have submitted a form, posted a comment, or saved a file but there's no evidence
|
|
176
|
+
3. **Incomplete results**: Some requirements from the task are not addressed in the response
|
|
177
|
+
4. **Fabricated content**: Data that looks plausible but wasn't actually extracted from any page. NOTE: dates and times close to today's date (${current_date}) are NOT fabricated - the agent browses live websites and extracts real-time content.
|
|
178
|
+
5. **Partial completion reported as success**: Response acknowledges failure or blockers (captcha, access denied, etc.) but still claims success
|
|
179
|
+
|
|
180
|
+
Respond with EXACTLY this JSON structure:
|
|
181
|
+
{
|
|
182
|
+
"is_correct": true or false,
|
|
183
|
+
"reason": "Brief explanation if not correct, empty string if correct"
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
Be strict: if the response doesn't clearly satisfy every requirement, set is_correct to false.`;
|
|
187
|
+
const user_prompt = `<task>
|
|
188
|
+
${task_truncated || 'No task provided'}
|
|
189
|
+
</task>
|
|
190
|
+
|
|
191
|
+
<agent_final_response>
|
|
192
|
+
${final_result_truncated || 'No response provided'}
|
|
193
|
+
</agent_final_response>
|
|
194
|
+
|
|
195
|
+
Does the agent's response fully satisfy all requirements of the task? Respond with the JSON structure.`;
|
|
196
|
+
return [new SystemMessage(system_prompt), new UserMessage(user_prompt)];
|
|
197
|
+
};
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { SystemMessage, UserMessage, type Message } from '../../llm/messages.js';
|
|
2
|
-
import {
|
|
1
|
+
import { ContentPartImageParam, ContentPartTextParam, SystemMessage, UserMessage, type Message } from '../../llm/messages.js';
|
|
2
|
+
import type { BaseChatModel } from '../../llm/base.js';
|
|
3
|
+
import { ActionResult, AgentOutput, AgentStepInfo, MessageCompactionSettings } from '../views.js';
|
|
3
4
|
import { BrowserStateSummary } from '../../browser/views.js';
|
|
4
5
|
import { FileSystem } from '../../filesystem/file-system.js';
|
|
5
6
|
import { MessageManagerState } from './views.js';
|
|
@@ -11,20 +12,27 @@ export declare class MessageManager {
|
|
|
11
12
|
private readonly maxHistoryItems;
|
|
12
13
|
private readonly visionDetailLevel;
|
|
13
14
|
private readonly includeToolCallExamples;
|
|
15
|
+
private readonly includeRecentEvents;
|
|
16
|
+
private readonly sampleImages;
|
|
17
|
+
private readonly llmScreenshotSize;
|
|
14
18
|
private task;
|
|
15
19
|
private systemPrompt;
|
|
16
20
|
private sensitiveDataDescription;
|
|
17
21
|
private lastInputMessages;
|
|
18
22
|
private includeAttributes;
|
|
19
|
-
|
|
23
|
+
last_state_message_text: string | null;
|
|
24
|
+
constructor(task: string, systemMessage: SystemMessage, fileSystem: FileSystem, state?: MessageManagerState, useThinking?: boolean, includeAttributes?: string[] | null, sensitiveData?: Record<string, string | Record<string, string>> | undefined, maxHistoryItems?: number | null, visionDetailLevel?: 'auto' | 'low' | 'high', includeToolCallExamples?: boolean, includeRecentEvents?: boolean, sampleImages?: Array<ContentPartTextParam | ContentPartImageParam> | null, llmScreenshotSize?: [number, number] | null);
|
|
20
25
|
get agent_history_description(): string;
|
|
21
26
|
add_new_task(new_task: string): void;
|
|
22
27
|
private updateAgentHistoryDescription;
|
|
23
28
|
private getSensitiveDataDescription;
|
|
24
|
-
|
|
29
|
+
prepare_step_state(browser_state_summary: BrowserStateSummary, model_output?: AgentOutput | null, result?: ActionResult[] | null, step_info?: AgentStepInfo | null, sensitive_data?: Record<string, string | Record<string, string>> | null): void;
|
|
30
|
+
maybe_compact_messages(llm: BaseChatModel | null, settings: MessageCompactionSettings | null, step_info?: AgentStepInfo | null): Promise<boolean>;
|
|
31
|
+
create_state_messages(browser_state_summary: BrowserStateSummary, model_output?: AgentOutput | null, result?: ActionResult[] | null, step_info?: AgentStepInfo | null, use_vision?: boolean | 'auto', page_filtered_actions?: string | null, sensitive_data?: Record<string, string | Record<string, string>> | null, available_file_paths?: string[] | null, include_recent_events?: boolean | null, plan_description?: string | null, unavailable_skills_info?: string | null, skip_state_update?: boolean): void;
|
|
25
32
|
get_messages(): Message[];
|
|
26
33
|
private setMessageWithType;
|
|
27
34
|
private addContextMessage;
|
|
28
35
|
_add_context_message(message: SystemMessage | UserMessage): void;
|
|
36
|
+
private extractStateMessageText;
|
|
29
37
|
private filterSensitiveData;
|
|
30
38
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { ContentPartTextParam, } from '../../llm/messages.js';
|
|
1
|
+
import { ContentPartTextParam, SystemMessage, UserMessage, } from '../../llm/messages.js';
|
|
2
2
|
import { AgentMessagePrompt } from '../prompts.js';
|
|
3
3
|
import { MessageManagerState, HistoryItem } from './views.js';
|
|
4
4
|
import { match_url_with_domain_pattern } from '../../utils.js';
|
|
@@ -12,12 +12,16 @@ export class MessageManager {
|
|
|
12
12
|
maxHistoryItems;
|
|
13
13
|
visionDetailLevel;
|
|
14
14
|
includeToolCallExamples;
|
|
15
|
+
includeRecentEvents;
|
|
16
|
+
sampleImages;
|
|
17
|
+
llmScreenshotSize;
|
|
15
18
|
task;
|
|
16
19
|
systemPrompt;
|
|
17
20
|
sensitiveDataDescription = '';
|
|
18
21
|
lastInputMessages = [];
|
|
19
22
|
includeAttributes;
|
|
20
|
-
|
|
23
|
+
last_state_message_text = null;
|
|
24
|
+
constructor(task, systemMessage, fileSystem, state = new MessageManagerState(), useThinking = true, includeAttributes = null, sensitiveData, maxHistoryItems = null, visionDetailLevel = 'auto', includeToolCallExamples = false, includeRecentEvents = false, sampleImages = null, llmScreenshotSize = null) {
|
|
21
25
|
this.fileSystem = fileSystem;
|
|
22
26
|
this.state = state;
|
|
23
27
|
this.useThinking = useThinking;
|
|
@@ -25,6 +29,12 @@ export class MessageManager {
|
|
|
25
29
|
this.maxHistoryItems = maxHistoryItems;
|
|
26
30
|
this.visionDetailLevel = visionDetailLevel;
|
|
27
31
|
this.includeToolCallExamples = includeToolCallExamples;
|
|
32
|
+
this.includeRecentEvents = includeRecentEvents;
|
|
33
|
+
this.sampleImages = sampleImages;
|
|
34
|
+
this.llmScreenshotSize = llmScreenshotSize;
|
|
35
|
+
if (this.maxHistoryItems != null && this.maxHistoryItems <= 5) {
|
|
36
|
+
throw new Error('max_history_items must be null or greater than 5');
|
|
37
|
+
}
|
|
28
38
|
this.task = task;
|
|
29
39
|
this.systemPrompt = systemMessage;
|
|
30
40
|
this.includeAttributes = includeAttributes ?? [];
|
|
@@ -33,16 +43,21 @@ export class MessageManager {
|
|
|
33
43
|
}
|
|
34
44
|
}
|
|
35
45
|
get agent_history_description() {
|
|
46
|
+
const compactedPrefix = this.state.compacted_memory
|
|
47
|
+
? `<compacted_memory>\n${this.state.compacted_memory}\n</compacted_memory>\n`
|
|
48
|
+
: '';
|
|
36
49
|
if (this.maxHistoryItems == null) {
|
|
37
|
-
return
|
|
38
|
-
.
|
|
39
|
-
|
|
50
|
+
return (compactedPrefix +
|
|
51
|
+
this.state.agent_history_items
|
|
52
|
+
.map((item) => item.to_string())
|
|
53
|
+
.join('\n'));
|
|
40
54
|
}
|
|
41
55
|
const totalItems = this.state.agent_history_items.length;
|
|
42
56
|
if (totalItems <= this.maxHistoryItems) {
|
|
43
|
-
return
|
|
44
|
-
.
|
|
45
|
-
|
|
57
|
+
return (compactedPrefix +
|
|
58
|
+
this.state.agent_history_items
|
|
59
|
+
.map((item) => item.to_string())
|
|
60
|
+
.join('\n'));
|
|
46
61
|
}
|
|
47
62
|
const omitted = totalItems - this.maxHistoryItems;
|
|
48
63
|
const keepRecent = this.maxHistoryItems - 1;
|
|
@@ -52,53 +67,80 @@ export class MessageManager {
|
|
|
52
67
|
parts.push(...this.state.agent_history_items
|
|
53
68
|
.slice(-keepRecent)
|
|
54
69
|
.map((item) => item.to_string()));
|
|
55
|
-
return parts.join('\n');
|
|
70
|
+
return compactedPrefix + parts.join('\n');
|
|
56
71
|
}
|
|
57
72
|
add_new_task(new_task) {
|
|
58
|
-
|
|
59
|
-
this.
|
|
73
|
+
const normalizedTask = `<follow_up_user_request> ${new_task.trim()} </follow_up_user_request>`;
|
|
74
|
+
if (!this.task.includes('<initial_user_request>')) {
|
|
75
|
+
this.task = `<initial_user_request>${this.task}</initial_user_request>`;
|
|
76
|
+
}
|
|
77
|
+
this.task += `\n${normalizedTask}`;
|
|
78
|
+
this.state.agent_history_items.push(new HistoryItem(null, null, null, null, null, null, normalizedTask));
|
|
60
79
|
}
|
|
61
80
|
updateAgentHistoryDescription(model_output, result, step_info) {
|
|
62
81
|
const results = result ?? [];
|
|
63
82
|
const stepNumber = step_info?.step_number ?? null;
|
|
64
83
|
this.state.read_state_description = '';
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
84
|
+
this.state.read_state_images = [];
|
|
85
|
+
let actionResults = '';
|
|
86
|
+
let readStateIndex = 0;
|
|
87
|
+
results.forEach((action) => {
|
|
68
88
|
if (action.include_extracted_content_only_once &&
|
|
69
89
|
action.extracted_content) {
|
|
70
|
-
this.state.read_state_description +=
|
|
90
|
+
this.state.read_state_description += `<read_state_${readStateIndex}>\n${action.extracted_content}\n</read_state_${readStateIndex}>\n`;
|
|
91
|
+
readStateIndex += 1;
|
|
92
|
+
}
|
|
93
|
+
if (Array.isArray(action.images) && action.images.length > 0) {
|
|
94
|
+
this.state.read_state_images.push(...action.images);
|
|
71
95
|
}
|
|
72
96
|
if (action.long_term_memory) {
|
|
73
|
-
|
|
97
|
+
actionResults += `${action.long_term_memory}\n`;
|
|
74
98
|
}
|
|
75
99
|
else if (action.extracted_content &&
|
|
76
100
|
!action.include_extracted_content_only_once) {
|
|
77
|
-
|
|
101
|
+
actionResults += `${action.extracted_content}\n`;
|
|
78
102
|
}
|
|
79
103
|
if (action.error) {
|
|
80
104
|
const err = action.error.length > 200
|
|
81
105
|
? `${action.error.slice(0, 100)}......${action.error.slice(-100)}`
|
|
82
106
|
: action.error;
|
|
83
|
-
|
|
107
|
+
actionResults += `${err}\n`;
|
|
84
108
|
}
|
|
85
109
|
});
|
|
86
|
-
const
|
|
110
|
+
const MAX_CONTENT_SIZE = 60000;
|
|
111
|
+
if (this.state.read_state_description.length > MAX_CONTENT_SIZE) {
|
|
112
|
+
this.state.read_state_description = `${this.state.read_state_description.slice(0, MAX_CONTENT_SIZE)}\n... [Content truncated at 60k characters]`;
|
|
113
|
+
}
|
|
114
|
+
this.state.read_state_description =
|
|
115
|
+
this.state.read_state_description.trim();
|
|
116
|
+
let normalizedActionResults = actionResults
|
|
117
|
+
? `Result\n${actionResults}`.trim()
|
|
118
|
+
: null;
|
|
119
|
+
if (normalizedActionResults) {
|
|
120
|
+
if (normalizedActionResults.length > MAX_CONTENT_SIZE) {
|
|
121
|
+
normalizedActionResults = `${normalizedActionResults.slice(0, MAX_CONTENT_SIZE)}\n... [Content truncated at 60k characters]`;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
87
124
|
if (!model_output) {
|
|
88
|
-
if (stepNumber != null
|
|
89
|
-
|
|
125
|
+
if (stepNumber != null) {
|
|
126
|
+
if (stepNumber === 0 && normalizedActionResults) {
|
|
127
|
+
this.state.agent_history_items.push(new HistoryItem(stepNumber, null, null, null, normalizedActionResults, null, null));
|
|
128
|
+
}
|
|
129
|
+
else if (stepNumber > 0) {
|
|
130
|
+
this.state.agent_history_items.push(new HistoryItem(stepNumber, null, null, null, null, 'Agent failed to output in the right format.', null));
|
|
131
|
+
}
|
|
90
132
|
}
|
|
91
133
|
return;
|
|
92
134
|
}
|
|
93
135
|
const brain = model_output.current_state;
|
|
94
|
-
this.state.agent_history_items.push(new HistoryItem(stepNumber, brain.evaluation_previous_goal, brain.memory, brain.next_goal,
|
|
136
|
+
this.state.agent_history_items.push(new HistoryItem(stepNumber, brain.evaluation_previous_goal, brain.memory, brain.next_goal, normalizedActionResults, null, null));
|
|
95
137
|
}
|
|
96
|
-
getSensitiveDataDescription(currentUrl) {
|
|
138
|
+
getSensitiveDataDescription(currentUrl, sensitiveData = this.sensitiveData) {
|
|
97
139
|
const placeholders = new Set();
|
|
98
|
-
if (!
|
|
140
|
+
if (!sensitiveData) {
|
|
99
141
|
return '';
|
|
100
142
|
}
|
|
101
|
-
for (const [key, value] of Object.entries(
|
|
143
|
+
for (const [key, value] of Object.entries(sensitiveData)) {
|
|
102
144
|
if (value && typeof value === 'object' && !Array.isArray(value)) {
|
|
103
145
|
if (currentUrl &&
|
|
104
146
|
match_url_with_domain_pattern(currentUrl, key, true)) {
|
|
@@ -112,18 +154,121 @@ export class MessageManager {
|
|
|
112
154
|
if (!placeholders.size) {
|
|
113
155
|
return '';
|
|
114
156
|
}
|
|
115
|
-
|
|
157
|
+
const placeholderList = `[${Array.from(placeholders)
|
|
158
|
+
.sort()
|
|
159
|
+
.map((placeholder) => `'${placeholder.replaceAll("'", "\\'")}'`)
|
|
160
|
+
.join(', ')}]`;
|
|
161
|
+
return `Here are placeholders for sensitive data:\n${placeholderList}\nTo use them, write <secret>the placeholder name</secret>`;
|
|
116
162
|
}
|
|
117
|
-
|
|
163
|
+
prepare_step_state(browser_state_summary, model_output = null, result = null, step_info = null, sensitive_data = null) {
|
|
118
164
|
this.state.history.context_messages = [];
|
|
119
165
|
this.updateAgentHistoryDescription(model_output, result, step_info);
|
|
120
|
-
|
|
121
|
-
|
|
166
|
+
const effectiveSensitiveData = sensitive_data ?? this.sensitiveData;
|
|
167
|
+
if (effectiveSensitiveData) {
|
|
168
|
+
this.sensitiveDataDescription = this.getSensitiveDataDescription(browser_state_summary.url, effectiveSensitiveData);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
async maybe_compact_messages(llm, settings, step_info = null) {
|
|
172
|
+
if (!settings || !settings.enabled || !llm || !step_info) {
|
|
173
|
+
return false;
|
|
174
|
+
}
|
|
175
|
+
const stepsSince = step_info.step_number - (this.state.last_compaction_step ?? 0);
|
|
176
|
+
if (stepsSince < settings.compact_every_n_steps) {
|
|
177
|
+
return false;
|
|
178
|
+
}
|
|
179
|
+
const historyItems = this.state.agent_history_items;
|
|
180
|
+
const fullHistoryText = historyItems
|
|
181
|
+
.map((item) => item.to_string())
|
|
182
|
+
.join('\n')
|
|
183
|
+
.trim();
|
|
184
|
+
const triggerCharCount = settings.trigger_char_count ?? 40000;
|
|
185
|
+
if (fullHistoryText.length < triggerCharCount) {
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
logger.debug(`Compacting message history (items=${historyItems.length}, chars=${fullHistoryText.length})`);
|
|
189
|
+
const compactionSections = [];
|
|
190
|
+
if (this.state.compacted_memory) {
|
|
191
|
+
compactionSections.push(`<previous_compacted_memory>\n${this.state.compacted_memory}\n</previous_compacted_memory>`);
|
|
192
|
+
}
|
|
193
|
+
compactionSections.push(`<agent_history>\n${fullHistoryText}\n</agent_history>`);
|
|
194
|
+
if (settings.include_read_state && this.state.read_state_description) {
|
|
195
|
+
compactionSections.push(`<read_state>\n${this.state.read_state_description}\n</read_state>`);
|
|
196
|
+
}
|
|
197
|
+
let compactionInput = compactionSections.join('\n\n');
|
|
198
|
+
if (this.sensitiveData) {
|
|
199
|
+
const filtered = this.filterSensitiveData(new UserMessage(compactionInput));
|
|
200
|
+
compactionInput = filtered.text;
|
|
201
|
+
}
|
|
202
|
+
let systemPrompt = 'You are summarizing an agent run for prompt compaction.\n' +
|
|
203
|
+
'Capture task requirements, key facts, decisions, partial progress, errors, and next steps.\n' +
|
|
204
|
+
'Preserve important entities, values, URLs, and file paths.\n' +
|
|
205
|
+
'Return plain text only. Do not include tool calls or JSON.';
|
|
206
|
+
if (settings.summary_max_chars) {
|
|
207
|
+
systemPrompt += ` Keep under ${settings.summary_max_chars} characters if possible.`;
|
|
208
|
+
}
|
|
209
|
+
let summary;
|
|
210
|
+
try {
|
|
211
|
+
const response = await llm.ainvoke([
|
|
212
|
+
new SystemMessage(systemPrompt),
|
|
213
|
+
new UserMessage(compactionInput),
|
|
214
|
+
]);
|
|
215
|
+
summary = String(response?.completion ?? '').trim();
|
|
216
|
+
}
|
|
217
|
+
catch (error) {
|
|
218
|
+
logger.warning(`Failed to compact messages: ${error instanceof Error ? error.message : String(error)}`);
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
if (!summary) {
|
|
222
|
+
return false;
|
|
223
|
+
}
|
|
224
|
+
if (settings.summary_max_chars &&
|
|
225
|
+
summary.length > settings.summary_max_chars) {
|
|
226
|
+
summary = `${summary.slice(0, settings.summary_max_chars).trimEnd()}…`;
|
|
227
|
+
}
|
|
228
|
+
this.state.compacted_memory = summary;
|
|
229
|
+
this.state.compaction_count += 1;
|
|
230
|
+
this.state.last_compaction_step = step_info.step_number;
|
|
231
|
+
const keepLast = Math.max(0, settings.keep_last_items);
|
|
232
|
+
if (historyItems.length > keepLast + 1) {
|
|
233
|
+
if (keepLast === 0) {
|
|
234
|
+
this.state.agent_history_items = [historyItems[0]];
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
this.state.agent_history_items = [
|
|
238
|
+
historyItems[0],
|
|
239
|
+
...historyItems.slice(-keepLast),
|
|
240
|
+
];
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
logger.debug(`Compaction complete (summary_chars=${summary.length}, history_items=${this.state.agent_history_items.length})`);
|
|
244
|
+
return true;
|
|
245
|
+
}
|
|
246
|
+
create_state_messages(browser_state_summary, model_output = null, result = null, step_info = null, use_vision = true, page_filtered_actions = null, sensitive_data = null, available_file_paths = null, include_recent_events = null, plan_description = null, unavailable_skills_info = null, skip_state_update = false) {
|
|
247
|
+
if (!skip_state_update) {
|
|
248
|
+
this.prepare_step_state(browser_state_summary, model_output, result, step_info, sensitive_data);
|
|
122
249
|
}
|
|
123
250
|
const screenshots = [];
|
|
124
|
-
|
|
251
|
+
let includeScreenshotRequested = false;
|
|
252
|
+
if (result) {
|
|
253
|
+
for (const actionResult of result) {
|
|
254
|
+
if (actionResult.metadata?.include_screenshot) {
|
|
255
|
+
includeScreenshotRequested = true;
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
let includeScreenshot = false;
|
|
261
|
+
if (use_vision === true) {
|
|
262
|
+
includeScreenshot = true;
|
|
263
|
+
}
|
|
264
|
+
else if (use_vision === 'auto') {
|
|
265
|
+
includeScreenshot = includeScreenshotRequested;
|
|
266
|
+
}
|
|
267
|
+
if (includeScreenshot && browser_state_summary.screenshot) {
|
|
125
268
|
screenshots.push(browser_state_summary.screenshot);
|
|
126
269
|
}
|
|
270
|
+
const effectiveUseVision = screenshots.length > 0;
|
|
271
|
+
const includeRecentEvents = include_recent_events ?? this.includeRecentEvents;
|
|
127
272
|
const prompt = new AgentMessagePrompt({
|
|
128
273
|
browser_state_summary,
|
|
129
274
|
file_system: this.fileSystem,
|
|
@@ -137,8 +282,15 @@ export class MessageManager {
|
|
|
137
282
|
available_file_paths,
|
|
138
283
|
screenshots,
|
|
139
284
|
vision_detail_level: this.visionDetailLevel,
|
|
285
|
+
include_recent_events: includeRecentEvents,
|
|
286
|
+
sample_images: this.sampleImages,
|
|
287
|
+
read_state_images: this.state.read_state_images,
|
|
288
|
+
llm_screenshot_size: this.llmScreenshotSize,
|
|
289
|
+
plan_description,
|
|
290
|
+
unavailable_skills_info,
|
|
140
291
|
});
|
|
141
|
-
const message = prompt.get_user_message(
|
|
292
|
+
const message = prompt.get_user_message(effectiveUseVision);
|
|
293
|
+
this.last_state_message_text = this.extractStateMessageText(message);
|
|
142
294
|
this.setMessageWithType(message, 'state');
|
|
143
295
|
}
|
|
144
296
|
get_messages() {
|
|
@@ -147,25 +299,39 @@ export class MessageManager {
|
|
|
147
299
|
return this.lastInputMessages;
|
|
148
300
|
}
|
|
149
301
|
setMessageWithType(message, messageType) {
|
|
150
|
-
const filtered = this.sensitiveData
|
|
151
|
-
? this.filterSensitiveData(message)
|
|
152
|
-
: message;
|
|
153
302
|
if (messageType === 'system') {
|
|
154
|
-
this.state.history.system_message =
|
|
303
|
+
this.state.history.system_message = message;
|
|
155
304
|
}
|
|
156
305
|
else {
|
|
306
|
+
const filtered = this.sensitiveData
|
|
307
|
+
? this.filterSensitiveData(message)
|
|
308
|
+
: message;
|
|
157
309
|
this.state.history.state_message = filtered;
|
|
158
310
|
}
|
|
159
311
|
}
|
|
160
312
|
addContextMessage(message) {
|
|
161
|
-
|
|
162
|
-
? this.filterSensitiveData(message)
|
|
163
|
-
: message;
|
|
164
|
-
this.state.history.context_messages.push(filtered);
|
|
313
|
+
this.state.history.context_messages.push(message);
|
|
165
314
|
}
|
|
166
315
|
_add_context_message(message) {
|
|
167
316
|
this.addContextMessage(message);
|
|
168
317
|
}
|
|
318
|
+
extractStateMessageText(message) {
|
|
319
|
+
if (typeof message.content === 'string') {
|
|
320
|
+
return message.content;
|
|
321
|
+
}
|
|
322
|
+
if (!Array.isArray(message.content)) {
|
|
323
|
+
return null;
|
|
324
|
+
}
|
|
325
|
+
return message.content
|
|
326
|
+
.map((part) => {
|
|
327
|
+
if (part instanceof ContentPartTextParam) {
|
|
328
|
+
return part.text;
|
|
329
|
+
}
|
|
330
|
+
return null;
|
|
331
|
+
})
|
|
332
|
+
.filter((part) => typeof part === 'string')
|
|
333
|
+
.join('\n');
|
|
334
|
+
}
|
|
169
335
|
filterSensitiveData(message) {
|
|
170
336
|
if (!this.sensitiveData) {
|
|
171
337
|
return message;
|
|
@@ -21,6 +21,10 @@ export declare class MessageManagerState {
|
|
|
21
21
|
tool_id: number;
|
|
22
22
|
agent_history_items: HistoryItem[];
|
|
23
23
|
read_state_description: string;
|
|
24
|
+
read_state_images: Array<Record<string, unknown>>;
|
|
25
|
+
compacted_memory: string | null;
|
|
26
|
+
compaction_count: number;
|
|
27
|
+
last_compaction_step: number | null;
|
|
24
28
|
get historyMessages(): Message[];
|
|
25
29
|
get_messages(): Message[];
|
|
26
30
|
}
|
|
@@ -19,28 +19,28 @@ export class HistoryItem {
|
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
21
|
to_string() {
|
|
22
|
-
const stepStr = this.step_number != null ?
|
|
22
|
+
const stepStr = this.step_number != null ? 'step' : 'step_unknown';
|
|
23
23
|
if (this.error) {
|
|
24
|
-
return `<${stepStr}>\n${this.error}
|
|
24
|
+
return `<${stepStr}>\n${this.error}`;
|
|
25
25
|
}
|
|
26
26
|
if (this.system_message) {
|
|
27
|
-
return
|
|
27
|
+
return this.system_message;
|
|
28
28
|
}
|
|
29
29
|
const parts = [];
|
|
30
30
|
if (this.evaluation_previous_goal) {
|
|
31
|
-
parts.push(
|
|
31
|
+
parts.push(`${this.evaluation_previous_goal}`);
|
|
32
32
|
}
|
|
33
33
|
if (this.memory) {
|
|
34
|
-
parts.push(
|
|
34
|
+
parts.push(`${this.memory}`);
|
|
35
35
|
}
|
|
36
36
|
if (this.next_goal) {
|
|
37
|
-
parts.push(
|
|
37
|
+
parts.push(`${this.next_goal}`);
|
|
38
38
|
}
|
|
39
39
|
if (this.action_results) {
|
|
40
40
|
parts.push(this.action_results);
|
|
41
41
|
}
|
|
42
42
|
const content = parts.join('\n');
|
|
43
|
-
return `<${stepStr}>\n${content}
|
|
43
|
+
return `<${stepStr}>\n${content}`;
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
export class MessageHistory {
|
|
@@ -64,6 +64,10 @@ export class MessageManagerState {
|
|
|
64
64
|
new HistoryItem(0, null, null, null, null, null, 'Agent initialized'),
|
|
65
65
|
];
|
|
66
66
|
read_state_description = '';
|
|
67
|
+
read_state_images = [];
|
|
68
|
+
compacted_memory = null;
|
|
69
|
+
compaction_count = 0;
|
|
70
|
+
last_compaction_step = null;
|
|
67
71
|
get historyMessages() {
|
|
68
72
|
return this.history.get_messages();
|
|
69
73
|
}
|