optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Judge system for evaluating browser-use agent execution traces."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from browser_use.llm.messages import (
|
|
8
|
+
BaseMessage,
|
|
9
|
+
ContentPartImageParam,
|
|
10
|
+
ContentPartTextParam,
|
|
11
|
+
ImageURL,
|
|
12
|
+
SystemMessage,
|
|
13
|
+
UserMessage,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _encode_image(image_path: str) -> str | None:
|
|
20
|
+
"""Encode image to base64 string."""
|
|
21
|
+
try:
|
|
22
|
+
path = Path(image_path)
|
|
23
|
+
if not path.exists():
|
|
24
|
+
return None
|
|
25
|
+
with open(path, 'rb') as f:
|
|
26
|
+
return base64.b64encode(f.read()).decode('utf-8')
|
|
27
|
+
except Exception as e:
|
|
28
|
+
logger.warning(f'Failed to encode image {image_path}: {e}')
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _truncate_text(text: str, max_length: int, from_beginning: bool = False) -> str:
|
|
33
|
+
"""Truncate text to maximum length with eval system indicator."""
|
|
34
|
+
if len(text) <= max_length:
|
|
35
|
+
return text
|
|
36
|
+
if from_beginning:
|
|
37
|
+
return '...[text truncated]' + text[-max_length + 23 :]
|
|
38
|
+
else:
|
|
39
|
+
return text[: max_length - 23] + '...[text truncated]...'
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def construct_judge_messages(
|
|
43
|
+
task: str,
|
|
44
|
+
final_result: str,
|
|
45
|
+
agent_steps: list[str],
|
|
46
|
+
screenshot_paths: list[str],
|
|
47
|
+
max_images: int = 10,
|
|
48
|
+
) -> list[BaseMessage]:
|
|
49
|
+
"""
|
|
50
|
+
Construct messages for judge evaluation of agent trace.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
task: The original task description
|
|
54
|
+
final_result: The final result returned to the user
|
|
55
|
+
agent_steps: List of formatted agent step descriptions
|
|
56
|
+
screenshot_paths: List of screenshot file paths
|
|
57
|
+
max_images: Maximum number of screenshots to include
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of messages for LLM judge evaluation
|
|
61
|
+
"""
|
|
62
|
+
task_truncated = _truncate_text(task, 40000)
|
|
63
|
+
final_result_truncated = _truncate_text(final_result, 40000)
|
|
64
|
+
steps_text = '\n'.join(agent_steps)
|
|
65
|
+
steps_text_truncated = _truncate_text(steps_text, 40000)
|
|
66
|
+
|
|
67
|
+
# Select last N screenshots
|
|
68
|
+
selected_screenshots = screenshot_paths[-max_images:] if len(screenshot_paths) > max_images else screenshot_paths
|
|
69
|
+
|
|
70
|
+
# Encode screenshots
|
|
71
|
+
encoded_images: list[ContentPartImageParam] = []
|
|
72
|
+
for img_path in selected_screenshots:
|
|
73
|
+
encoded = _encode_image(img_path)
|
|
74
|
+
if encoded:
|
|
75
|
+
encoded_images.append(
|
|
76
|
+
ContentPartImageParam(
|
|
77
|
+
image_url=ImageURL(
|
|
78
|
+
url=f'data:image/png;base64,{encoded}',
|
|
79
|
+
media_type='image/png',
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# System prompt for judge
|
|
85
|
+
system_prompt = """You are an expert judge evaluating browser automation agent performance.
|
|
86
|
+
|
|
87
|
+
<evaluation_framework>
|
|
88
|
+
**PRIMARY EVALUATION CRITERIA (in order of importance):**
|
|
89
|
+
1. **Task Satisfaction (Most Important)**: Did the agent accomplish what the user asked for? Break down the task into the key criteria and evaluate if the agent all of them. Focus on user intent and final outcome.
|
|
90
|
+
2. **Output Quality**: Is the final result in the correct format and complete? Does it match exactly what was requested?
|
|
91
|
+
3. **Tool Effectiveness**: Did the browser interactions work as expected? Were tools used appropriately? How many % of the tools failed?
|
|
92
|
+
4. **Agent Reasoning**: Quality of decision-making, planning, and problem-solving throughout the trajectory.
|
|
93
|
+
5. **Browser Handling**: Navigation stability, error recovery, and technical execution. If the browser crashes, does not load or a captcha blocks the task, the score must be very low.
|
|
94
|
+
|
|
95
|
+
**VERDICT GUIDELINES:**
|
|
96
|
+
- true: Task completed as requested, human-like execution, all of the users criteria were met and the agent did not make up any information.
|
|
97
|
+
- false: Task not completed, or only partially completed.
|
|
98
|
+
|
|
99
|
+
**Examples of task completion verdict:**
|
|
100
|
+
- If task asks for 10 items and agent finds 4 items correctly: false
|
|
101
|
+
- If task completed to full user requirements but with some errors to improve in the trajectory: true
|
|
102
|
+
- If task impossible due to captcha/login requirements: false
|
|
103
|
+
- If the trajectory is ideal and the output is perfect: true
|
|
104
|
+
- If the task asks to search all headphones in amazon under $100 but the agent searches all headphones and the lowest price is $150: false
|
|
105
|
+
- If the task asks to research a property and create a google doc with the result but the agents only returns the results in text: false
|
|
106
|
+
- If the task asks to complete an action on the page, and the agent reports that the action is completed but the screenshot or page shows the action is not actually complete: false
|
|
107
|
+
- If the task asks to use a certain tool or site to complete the task but the agent completes the task without using it: false
|
|
108
|
+
- If the task asks to look for a section of a page that does not exist: false
|
|
109
|
+
- If the agent concludes the task is impossible but it is not: false
|
|
110
|
+
- If the agent concludes the task is impossible and it truly is impossible: false
|
|
111
|
+
- If the agent is unable to complete the task because no login information was provided and it is truly needed to complete the task: false
|
|
112
|
+
|
|
113
|
+
**FAILURE CONDITIONS (automatically set verdict to false):**
|
|
114
|
+
- Blocked by captcha or missing authentication
|
|
115
|
+
- Output format completely wrong or missing
|
|
116
|
+
- Infinite loops or severe technical failures
|
|
117
|
+
- Critical user requirements ignored
|
|
118
|
+
- Page not loaded
|
|
119
|
+
- Browser crashed
|
|
120
|
+
- Agent could not interact with required UI elements
|
|
121
|
+
- The agent moved on from a important step in the task without completing it
|
|
122
|
+
- The agent made up content that is not in the screenshot or the page state
|
|
123
|
+
- The agent calls done action before completing all key points of the task
|
|
124
|
+
|
|
125
|
+
**IMPORTANT EVALUATION NOTES:**
|
|
126
|
+
- **evaluate for action** - For each key step of the trace, double check whether the action that the agent tried to performed actually happened. If the required action did not actually occur, the verdict should be false.
|
|
127
|
+
- **screenshot is not entire content** - The agent has the entire DOM content, but the screenshot is only part of the content. If the agent extracts information from the page, but you do not see it in the screenshot, you can assume this information is there.
|
|
128
|
+
- **Penalize poor tool usage** - Wrong tools, inefficient approaches, ignoring available information.
|
|
129
|
+
- **ignore unexpected dates and times** - These agent traces are from varying dates, you can assume the dates the agent uses for search or filtering are correct.
|
|
130
|
+
- **IMPORTANT**: be very picky about the user's request - Have very high standard for the agent completing the task exactly to the user's request.
|
|
131
|
+
- **IMPORTANT**: be initially doubtful of the agent's self reported success, be sure to verify that its methods are valid and fulfill the user's desires to a tee.
|
|
132
|
+
|
|
133
|
+
</evaluation_framework>
|
|
134
|
+
|
|
135
|
+
<response_format>
|
|
136
|
+
Respond with EXACTLY this JSON structure (no additional text before or after):
|
|
137
|
+
|
|
138
|
+
{{
|
|
139
|
+
"reasoning": "Breakdown of user task into key points. Detailed analysis covering: what went well, what didn't work, trajectory quality assessment, tool usage evaluation, output quality review, and overall user satisfaction prediction",
|
|
140
|
+
"verdict": true or false,
|
|
141
|
+
"failure_reason": "If verdict is false, provide the key reason why the task was not completed successfully. If verdict is true, use an empty string."
|
|
142
|
+
}}
|
|
143
|
+
</response_format>
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
user_prompt = f"""
|
|
147
|
+
<task>
|
|
148
|
+
{task_truncated or 'No task provided'}
|
|
149
|
+
</task>
|
|
150
|
+
|
|
151
|
+
<agent_trajectory>
|
|
152
|
+
{steps_text_truncated or 'No agent trajectory provided'}
|
|
153
|
+
</agent_trajectory>
|
|
154
|
+
|
|
155
|
+
<final_result>
|
|
156
|
+
{final_result_truncated or 'No final result provided'}
|
|
157
|
+
</final_result>
|
|
158
|
+
|
|
159
|
+
{len(encoded_images)} screenshots from execution are attached.
|
|
160
|
+
|
|
161
|
+
Evaluate this agent execution given the criteria and respond with the exact JSON structure requested."""
|
|
162
|
+
|
|
163
|
+
# Build messages with screenshots
|
|
164
|
+
content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=user_prompt)]
|
|
165
|
+
content_parts.extend(encoded_images)
|
|
166
|
+
|
|
167
|
+
return [
|
|
168
|
+
SystemMessage(content=system_prompt),
|
|
169
|
+
UserMessage(content=content_parts),
|
|
170
|
+
]
|
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from browser_use.agent.message_manager.views import (
|
|
7
|
+
HistoryItem,
|
|
8
|
+
)
|
|
9
|
+
from browser_use.agent.prompts import AgentMessagePrompt
|
|
10
|
+
from browser_use.agent.views import (
|
|
11
|
+
ActionResult,
|
|
12
|
+
AgentOutput,
|
|
13
|
+
AgentStepInfo,
|
|
14
|
+
MessageManagerState,
|
|
15
|
+
)
|
|
16
|
+
from browser_use.browser.views import BrowserStateSummary
|
|
17
|
+
from browser_use.filesystem.file_system import FileSystem
|
|
18
|
+
from browser_use.llm.messages import (
|
|
19
|
+
BaseMessage,
|
|
20
|
+
ContentPartImageParam,
|
|
21
|
+
ContentPartTextParam,
|
|
22
|
+
SystemMessage,
|
|
23
|
+
)
|
|
24
|
+
from browser_use.observability import observe_debug
|
|
25
|
+
from browser_use.utils import match_url_with_domain_pattern, time_execution_sync
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ========== Logging Helper Functions ==========
|
|
31
|
+
# These functions are used ONLY for formatting debug log output.
|
|
32
|
+
# They do NOT affect the actual message content sent to the LLM.
|
|
33
|
+
# All logging functions start with _log_ for easy identification.
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _log_get_message_emoji(message: BaseMessage) -> str:
|
|
37
|
+
"""Get emoji for a message type - used only for logging display"""
|
|
38
|
+
emoji_map = {
|
|
39
|
+
'UserMessage': '💬',
|
|
40
|
+
'SystemMessage': '🧠',
|
|
41
|
+
'AssistantMessage': '🔨',
|
|
42
|
+
}
|
|
43
|
+
return emoji_map.get(message.__class__.__name__, '🎮')
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _log_format_message_line(message: BaseMessage, content: str, is_last_message: bool, terminal_width: int) -> list[str]:
|
|
47
|
+
"""Format a single message for logging display"""
|
|
48
|
+
try:
|
|
49
|
+
lines = []
|
|
50
|
+
|
|
51
|
+
# Get emoji and token info
|
|
52
|
+
emoji = _log_get_message_emoji(message)
|
|
53
|
+
# token_str = str(message.metadata.tokens).rjust(4)
|
|
54
|
+
# TODO: fix the token count
|
|
55
|
+
token_str = '??? (TODO)'
|
|
56
|
+
prefix = f'{emoji}[{token_str}]: '
|
|
57
|
+
|
|
58
|
+
# Calculate available width (emoji=2 visual cols + [token]: =8 chars)
|
|
59
|
+
content_width = terminal_width - 10
|
|
60
|
+
|
|
61
|
+
# Handle last message wrapping
|
|
62
|
+
if is_last_message and len(content) > content_width:
|
|
63
|
+
# Find a good break point
|
|
64
|
+
break_point = content.rfind(' ', 0, content_width)
|
|
65
|
+
if break_point > content_width * 0.7: # Keep at least 70% of line
|
|
66
|
+
first_line = content[:break_point]
|
|
67
|
+
rest = content[break_point + 1 :]
|
|
68
|
+
else:
|
|
69
|
+
# No good break point, just truncate
|
|
70
|
+
first_line = content[:content_width]
|
|
71
|
+
rest = content[content_width:]
|
|
72
|
+
|
|
73
|
+
lines.append(prefix + first_line)
|
|
74
|
+
|
|
75
|
+
# Second line with 10-space indent
|
|
76
|
+
if rest:
|
|
77
|
+
if len(rest) > terminal_width - 10:
|
|
78
|
+
rest = rest[: terminal_width - 10]
|
|
79
|
+
lines.append(' ' * 10 + rest)
|
|
80
|
+
else:
|
|
81
|
+
# Single line - truncate if needed
|
|
82
|
+
if len(content) > content_width:
|
|
83
|
+
content = content[:content_width]
|
|
84
|
+
lines.append(prefix + content)
|
|
85
|
+
|
|
86
|
+
return lines
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.warning(f'Failed to format message line for logging: {e}')
|
|
89
|
+
# Return a simple fallback line
|
|
90
|
+
return ['❓[ ?]: [Error formatting message]']
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ========== End of Logging Helper Functions ==========
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class MessageManager:
|
|
97
|
+
vision_detail_level: Literal['auto', 'low', 'high']
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
task: str,
|
|
102
|
+
system_message: SystemMessage,
|
|
103
|
+
file_system: FileSystem,
|
|
104
|
+
state: MessageManagerState = MessageManagerState(),
|
|
105
|
+
use_thinking: bool = True,
|
|
106
|
+
include_attributes: list[str] | None = None,
|
|
107
|
+
sensitive_data: dict[str, str | dict[str, str]] | None = None,
|
|
108
|
+
max_history_items: int | None = None,
|
|
109
|
+
vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
|
|
110
|
+
include_tool_call_examples: bool = False,
|
|
111
|
+
include_recent_events: bool = False,
|
|
112
|
+
sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
|
|
113
|
+
):
|
|
114
|
+
self.task = task
|
|
115
|
+
self.state = state
|
|
116
|
+
self.system_prompt = system_message
|
|
117
|
+
self.file_system = file_system
|
|
118
|
+
self.sensitive_data_description = ''
|
|
119
|
+
self.use_thinking = use_thinking
|
|
120
|
+
self.max_history_items = max_history_items
|
|
121
|
+
self.vision_detail_level = vision_detail_level
|
|
122
|
+
self.include_tool_call_examples = include_tool_call_examples
|
|
123
|
+
self.include_recent_events = include_recent_events
|
|
124
|
+
self.sample_images = sample_images
|
|
125
|
+
|
|
126
|
+
assert max_history_items is None or max_history_items > 5, 'max_history_items must be None or greater than 5'
|
|
127
|
+
|
|
128
|
+
# Store settings as direct attributes instead of in a settings object
|
|
129
|
+
self.include_attributes = include_attributes or []
|
|
130
|
+
self.sensitive_data = sensitive_data
|
|
131
|
+
self.last_input_messages = []
|
|
132
|
+
self.last_state_message_text: str | None = None
|
|
133
|
+
# Only initialize messages if state is empty
|
|
134
|
+
if len(self.state.history.get_messages()) == 0:
|
|
135
|
+
self._set_message_with_type(self.system_prompt, 'system')
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def agent_history_description(self) -> str:
|
|
139
|
+
"""Build agent history description from list of items, respecting max_history_items limit"""
|
|
140
|
+
if self.max_history_items is None:
|
|
141
|
+
# Include all items
|
|
142
|
+
return '\n'.join(item.to_string() for item in self.state.agent_history_items)
|
|
143
|
+
|
|
144
|
+
total_items = len(self.state.agent_history_items)
|
|
145
|
+
|
|
146
|
+
# If we have fewer items than the limit, just return all items
|
|
147
|
+
if total_items <= self.max_history_items:
|
|
148
|
+
return '\n'.join(item.to_string() for item in self.state.agent_history_items)
|
|
149
|
+
|
|
150
|
+
# We have more items than the limit, so we need to omit some
|
|
151
|
+
omitted_count = total_items - self.max_history_items
|
|
152
|
+
|
|
153
|
+
# Show first item + omitted message + most recent (max_history_items - 1) items
|
|
154
|
+
# The omitted message doesn't count against the limit, only real history items do
|
|
155
|
+
recent_items_count = self.max_history_items - 1 # -1 for first item
|
|
156
|
+
|
|
157
|
+
items_to_include = [
|
|
158
|
+
self.state.agent_history_items[0].to_string(), # Keep first item (initialization)
|
|
159
|
+
f'<sys>[... {omitted_count} previous steps omitted...]</sys>',
|
|
160
|
+
]
|
|
161
|
+
# Add most recent items
|
|
162
|
+
items_to_include.extend([item.to_string() for item in self.state.agent_history_items[-recent_items_count:]])
|
|
163
|
+
|
|
164
|
+
return '\n'.join(items_to_include)
|
|
165
|
+
|
|
166
|
+
def add_new_task(self, new_task: str) -> None:
|
|
167
|
+
new_task = '<follow_up_user_request> ' + new_task.strip() + ' </follow_up_user_request>'
|
|
168
|
+
if '<initial_user_request>' not in self.task:
|
|
169
|
+
self.task = '<initial_user_request>' + self.task + '</initial_user_request>'
|
|
170
|
+
self.task += '\n' + new_task
|
|
171
|
+
task_update_item = HistoryItem(system_message=new_task)
|
|
172
|
+
self.state.agent_history_items.append(task_update_item)
|
|
173
|
+
|
|
174
|
+
def _update_agent_history_description(
|
|
175
|
+
self,
|
|
176
|
+
model_output: AgentOutput | None = None,
|
|
177
|
+
result: list[ActionResult] | None = None,
|
|
178
|
+
step_info: AgentStepInfo | None = None,
|
|
179
|
+
) -> None:
|
|
180
|
+
"""Update the agent history description"""
|
|
181
|
+
|
|
182
|
+
if result is None:
|
|
183
|
+
result = []
|
|
184
|
+
step_number = step_info.step_number if step_info else None
|
|
185
|
+
|
|
186
|
+
self.state.read_state_description = ''
|
|
187
|
+
self.state.read_state_images = [] # Clear images from previous step
|
|
188
|
+
|
|
189
|
+
action_results = ''
|
|
190
|
+
result_len = len(result)
|
|
191
|
+
read_state_idx = 0
|
|
192
|
+
|
|
193
|
+
for idx, action_result in enumerate(result):
|
|
194
|
+
if action_result.include_extracted_content_only_once and action_result.extracted_content:
|
|
195
|
+
self.state.read_state_description += (
|
|
196
|
+
f'<read_state_{read_state_idx}>\n{action_result.extracted_content}\n</read_state_{read_state_idx}>\n'
|
|
197
|
+
)
|
|
198
|
+
read_state_idx += 1
|
|
199
|
+
logger.debug(f'Added extracted_content to read_state_description: {action_result.extracted_content}')
|
|
200
|
+
|
|
201
|
+
# Store images for one-time inclusion in the next message
|
|
202
|
+
if action_result.images:
|
|
203
|
+
self.state.read_state_images.extend(action_result.images)
|
|
204
|
+
logger.debug(f'Added {len(action_result.images)} image(s) to read_state_images')
|
|
205
|
+
|
|
206
|
+
if action_result.long_term_memory:
|
|
207
|
+
action_results += f'{action_result.long_term_memory}\n'
|
|
208
|
+
logger.debug(f'Added long_term_memory to action_results: {action_result.long_term_memory}')
|
|
209
|
+
elif action_result.extracted_content and not action_result.include_extracted_content_only_once:
|
|
210
|
+
action_results += f'{action_result.extracted_content}\n'
|
|
211
|
+
logger.debug(f'Added extracted_content to action_results: {action_result.extracted_content}')
|
|
212
|
+
|
|
213
|
+
if action_result.error:
|
|
214
|
+
if len(action_result.error) > 200:
|
|
215
|
+
error_text = action_result.error[:100] + '......' + action_result.error[-100:]
|
|
216
|
+
else:
|
|
217
|
+
error_text = action_result.error
|
|
218
|
+
action_results += f'{error_text}\n'
|
|
219
|
+
logger.debug(f'Added error to action_results: {error_text}')
|
|
220
|
+
|
|
221
|
+
# Simple 60k character limit for read_state_description
|
|
222
|
+
MAX_CONTENT_SIZE = 60000
|
|
223
|
+
if len(self.state.read_state_description) > MAX_CONTENT_SIZE:
|
|
224
|
+
self.state.read_state_description = (
|
|
225
|
+
self.state.read_state_description[:MAX_CONTENT_SIZE] + '\n... [Content truncated at 60k characters]'
|
|
226
|
+
)
|
|
227
|
+
logger.debug(f'Truncated read_state_description to {MAX_CONTENT_SIZE} characters')
|
|
228
|
+
|
|
229
|
+
self.state.read_state_description = self.state.read_state_description.strip('\n')
|
|
230
|
+
|
|
231
|
+
if action_results:
|
|
232
|
+
action_results = f'Result\n{action_results}'
|
|
233
|
+
action_results = action_results.strip('\n') if action_results else None
|
|
234
|
+
|
|
235
|
+
# Simple 60k character limit for action_results
|
|
236
|
+
if action_results and len(action_results) > MAX_CONTENT_SIZE:
|
|
237
|
+
action_results = action_results[:MAX_CONTENT_SIZE] + '\n... [Content truncated at 60k characters]'
|
|
238
|
+
logger.debug(f'Truncated action_results to {MAX_CONTENT_SIZE} characters')
|
|
239
|
+
|
|
240
|
+
# Build the history item
|
|
241
|
+
if model_output is None:
|
|
242
|
+
# Add history item for initial actions (step 0) or errors (step > 0)
|
|
243
|
+
if step_number is not None:
|
|
244
|
+
if step_number == 0 and action_results:
|
|
245
|
+
# Step 0 with initial action results
|
|
246
|
+
history_item = HistoryItem(step_number=step_number, action_results=action_results)
|
|
247
|
+
self.state.agent_history_items.append(history_item)
|
|
248
|
+
elif step_number > 0:
|
|
249
|
+
# Error case for steps > 0
|
|
250
|
+
history_item = HistoryItem(step_number=step_number, error='Agent failed to output in the right format.')
|
|
251
|
+
self.state.agent_history_items.append(history_item)
|
|
252
|
+
else:
|
|
253
|
+
history_item = HistoryItem(
|
|
254
|
+
step_number=step_number,
|
|
255
|
+
evaluation_previous_goal=model_output.current_state.evaluation_previous_goal,
|
|
256
|
+
memory=model_output.current_state.memory,
|
|
257
|
+
next_goal=model_output.current_state.next_goal,
|
|
258
|
+
action_results=action_results,
|
|
259
|
+
)
|
|
260
|
+
self.state.agent_history_items.append(history_item)
|
|
261
|
+
|
|
262
|
+
def _get_sensitive_data_description(self, current_page_url) -> str:
|
|
263
|
+
sensitive_data = self.sensitive_data
|
|
264
|
+
if not sensitive_data:
|
|
265
|
+
return ''
|
|
266
|
+
|
|
267
|
+
# Collect placeholders for sensitive data
|
|
268
|
+
placeholders: set[str] = set()
|
|
269
|
+
|
|
270
|
+
for key, value in sensitive_data.items():
|
|
271
|
+
if isinstance(value, dict):
|
|
272
|
+
# New format: {domain: {key: value}}
|
|
273
|
+
if current_page_url and match_url_with_domain_pattern(current_page_url, key, True):
|
|
274
|
+
placeholders.update(value.keys())
|
|
275
|
+
else:
|
|
276
|
+
# Old format: {key: value}
|
|
277
|
+
placeholders.add(key)
|
|
278
|
+
|
|
279
|
+
if placeholders:
|
|
280
|
+
placeholder_list = sorted(list(placeholders))
|
|
281
|
+
info = f'Here are placeholders for sensitive data:\n{placeholder_list}\n'
|
|
282
|
+
info += 'To use them, write <secret>the placeholder name</secret>'
|
|
283
|
+
return info
|
|
284
|
+
|
|
285
|
+
return ''
|
|
286
|
+
|
|
287
|
+
@observe_debug(ignore_input=True, ignore_output=True, name='create_state_messages')
|
|
288
|
+
@time_execution_sync('--create_state_messages')
|
|
289
|
+
def create_state_messages(
|
|
290
|
+
self,
|
|
291
|
+
browser_state_summary: BrowserStateSummary,
|
|
292
|
+
model_output: AgentOutput | None = None,
|
|
293
|
+
result: list[ActionResult] | None = None,
|
|
294
|
+
step_info: AgentStepInfo | None = None,
|
|
295
|
+
use_vision: bool | Literal['auto'] = 'auto',
|
|
296
|
+
page_filtered_actions: str | None = None,
|
|
297
|
+
sensitive_data=None,
|
|
298
|
+
available_file_paths: list[str] | None = None, # Always pass current available_file_paths
|
|
299
|
+
) -> None:
|
|
300
|
+
"""Create single state message with all content"""
|
|
301
|
+
|
|
302
|
+
# Clear contextual messages from previous steps to prevent accumulation
|
|
303
|
+
self.state.history.context_messages.clear()
|
|
304
|
+
|
|
305
|
+
# First, update the agent history items with the latest step results
|
|
306
|
+
self._update_agent_history_description(model_output, result, step_info)
|
|
307
|
+
|
|
308
|
+
# Use the passed sensitive_data parameter, falling back to instance variable
|
|
309
|
+
effective_sensitive_data = sensitive_data if sensitive_data is not None else self.sensitive_data
|
|
310
|
+
if effective_sensitive_data is not None:
|
|
311
|
+
# Update instance variable to keep it in sync
|
|
312
|
+
self.sensitive_data = effective_sensitive_data
|
|
313
|
+
self.sensitive_data_description = self._get_sensitive_data_description(browser_state_summary.url)
|
|
314
|
+
|
|
315
|
+
# Use only the current screenshot, but check if action results request screenshot inclusion
|
|
316
|
+
screenshots = []
|
|
317
|
+
include_screenshot_requested = False
|
|
318
|
+
|
|
319
|
+
# Check if any action results request screenshot inclusion
|
|
320
|
+
if result:
|
|
321
|
+
for action_result in result:
|
|
322
|
+
if action_result.metadata and action_result.metadata.get('include_screenshot'):
|
|
323
|
+
include_screenshot_requested = True
|
|
324
|
+
logger.debug('Screenshot inclusion requested by action result')
|
|
325
|
+
break
|
|
326
|
+
|
|
327
|
+
# Handle different use_vision modes:
|
|
328
|
+
# - "auto": Only include screenshot if explicitly requested by action (e.g., screenshot)
|
|
329
|
+
# - True: Always include screenshot
|
|
330
|
+
# - False: Never include screenshot
|
|
331
|
+
include_screenshot = False
|
|
332
|
+
if use_vision is True:
|
|
333
|
+
# Always include screenshot when use_vision=True
|
|
334
|
+
include_screenshot = True
|
|
335
|
+
elif use_vision == 'auto':
|
|
336
|
+
# Only include screenshot if explicitly requested by action when use_vision="auto"
|
|
337
|
+
include_screenshot = include_screenshot_requested
|
|
338
|
+
# else: use_vision is False, never include screenshot (include_screenshot stays False)
|
|
339
|
+
|
|
340
|
+
if include_screenshot and browser_state_summary.screenshot:
|
|
341
|
+
screenshots.append(browser_state_summary.screenshot)
|
|
342
|
+
|
|
343
|
+
# Use vision in the user message if screenshots are included
|
|
344
|
+
effective_use_vision = len(screenshots) > 0
|
|
345
|
+
|
|
346
|
+
# Create single state message with all content
|
|
347
|
+
assert browser_state_summary
|
|
348
|
+
state_message = AgentMessagePrompt(
|
|
349
|
+
browser_state_summary=browser_state_summary,
|
|
350
|
+
file_system=self.file_system,
|
|
351
|
+
agent_history_description=self.agent_history_description,
|
|
352
|
+
read_state_description=self.state.read_state_description,
|
|
353
|
+
task=self.task,
|
|
354
|
+
include_attributes=self.include_attributes,
|
|
355
|
+
step_info=step_info,
|
|
356
|
+
page_filtered_actions=page_filtered_actions,
|
|
357
|
+
sensitive_data=self.sensitive_data_description,
|
|
358
|
+
available_file_paths=available_file_paths,
|
|
359
|
+
screenshots=screenshots,
|
|
360
|
+
vision_detail_level=self.vision_detail_level,
|
|
361
|
+
include_recent_events=self.include_recent_events,
|
|
362
|
+
sample_images=self.sample_images,
|
|
363
|
+
read_state_images=self.state.read_state_images,
|
|
364
|
+
).get_user_message(effective_use_vision)
|
|
365
|
+
|
|
366
|
+
# Store state message text for history
|
|
367
|
+
self.last_state_message_text = state_message.text
|
|
368
|
+
|
|
369
|
+
# Set the state message with caching enabled
|
|
370
|
+
self._set_message_with_type(state_message, 'state')
|
|
371
|
+
|
|
372
|
+
def _log_history_lines(self) -> str:
|
|
373
|
+
"""Generate a formatted log string of message history for debugging / printing to terminal"""
|
|
374
|
+
# TODO: fix logging
|
|
375
|
+
|
|
376
|
+
# try:
|
|
377
|
+
# total_input_tokens = 0
|
|
378
|
+
# message_lines = []
|
|
379
|
+
# terminal_width = shutil.get_terminal_size((80, 20)).columns
|
|
380
|
+
|
|
381
|
+
# for i, m in enumerate(self.state.history.messages):
|
|
382
|
+
# try:
|
|
383
|
+
# total_input_tokens += m.metadata.tokens
|
|
384
|
+
# is_last_message = i == len(self.state.history.messages) - 1
|
|
385
|
+
|
|
386
|
+
# # Extract content for logging
|
|
387
|
+
# content = _log_extract_message_content(m.message, is_last_message, m.metadata)
|
|
388
|
+
|
|
389
|
+
# # Format the message line(s)
|
|
390
|
+
# lines = _log_format_message_line(m, content, is_last_message, terminal_width)
|
|
391
|
+
# message_lines.extend(lines)
|
|
392
|
+
# except Exception as e:
|
|
393
|
+
# logger.warning(f'Failed to format message {i} for logging: {e}')
|
|
394
|
+
# # Add a fallback line for this message
|
|
395
|
+
# message_lines.append('❓[ ?]: [Error formatting this message]')
|
|
396
|
+
|
|
397
|
+
# # Build final log message
|
|
398
|
+
# return (
|
|
399
|
+
# f'📜 LLM Message history ({len(self.state.history.messages)} messages, {total_input_tokens} tokens):\n'
|
|
400
|
+
# + '\n'.join(message_lines)
|
|
401
|
+
# )
|
|
402
|
+
# except Exception as e:
|
|
403
|
+
# logger.warning(f'Failed to generate history log: {e}')
|
|
404
|
+
# # Return a minimal fallback message
|
|
405
|
+
# return f'📜 LLM Message history (error generating log: {e})'
|
|
406
|
+
|
|
407
|
+
return ''
|
|
408
|
+
|
|
409
|
+
@time_execution_sync('--get_messages')
|
|
410
|
+
def get_messages(self) -> list[BaseMessage]:
|
|
411
|
+
"""Get current message list, potentially trimmed to max tokens"""
|
|
412
|
+
|
|
413
|
+
# Log message history for debugging
|
|
414
|
+
logger.debug(self._log_history_lines())
|
|
415
|
+
self.last_input_messages = self.state.history.get_messages()
|
|
416
|
+
return self.last_input_messages
|
|
417
|
+
|
|
418
|
+
def _set_message_with_type(self, message: BaseMessage, message_type: Literal['system', 'state']) -> None:
|
|
419
|
+
"""Replace a specific state message slot with a new message"""
|
|
420
|
+
# Don't filter system and state messages - they should contain placeholder tags or normal conversation
|
|
421
|
+
if message_type == 'system':
|
|
422
|
+
self.state.history.system_message = message
|
|
423
|
+
elif message_type == 'state':
|
|
424
|
+
self.state.history.state_message = message
|
|
425
|
+
else:
|
|
426
|
+
raise ValueError(f'Invalid state message type: {message_type}')
|
|
427
|
+
|
|
428
|
+
def _add_context_message(self, message: BaseMessage) -> None:
|
|
429
|
+
"""Add a contextual message specific to this step (e.g., validation errors, retry instructions, timeout warnings)"""
|
|
430
|
+
# Don't filter context messages - they should contain normal conversation or error messages
|
|
431
|
+
self.state.history.context_messages.append(message)
|
|
432
|
+
|
|
433
|
+
@time_execution_sync('--filter_sensitive_data')
|
|
434
|
+
def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:
|
|
435
|
+
"""Filter out sensitive data from the message"""
|
|
436
|
+
|
|
437
|
+
def replace_sensitive(value: str) -> str:
|
|
438
|
+
if not self.sensitive_data:
|
|
439
|
+
return value
|
|
440
|
+
|
|
441
|
+
# Collect all sensitive values, immediately converting old format to new format
|
|
442
|
+
sensitive_values: dict[str, str] = {}
|
|
443
|
+
|
|
444
|
+
# Process all sensitive data entries
|
|
445
|
+
for key_or_domain, content in self.sensitive_data.items():
|
|
446
|
+
if isinstance(content, dict):
|
|
447
|
+
# Already in new format: {domain: {key: value}}
|
|
448
|
+
for key, val in content.items():
|
|
449
|
+
if val: # Skip empty values
|
|
450
|
+
sensitive_values[key] = val
|
|
451
|
+
elif content: # Old format: {key: value} - convert to new format internally
|
|
452
|
+
# We treat this as if it was {'http*://*': {key_or_domain: content}}
|
|
453
|
+
sensitive_values[key_or_domain] = content
|
|
454
|
+
|
|
455
|
+
# If there are no valid sensitive data entries, just return the original value
|
|
456
|
+
if not sensitive_values:
|
|
457
|
+
logger.warning('No valid entries found in sensitive_data dictionary')
|
|
458
|
+
return value
|
|
459
|
+
|
|
460
|
+
# Replace all valid sensitive data values with their placeholder tags
|
|
461
|
+
for key, val in sensitive_values.items():
|
|
462
|
+
value = value.replace(val, f'<secret>{key}</secret>')
|
|
463
|
+
|
|
464
|
+
return value
|
|
465
|
+
|
|
466
|
+
if isinstance(message.content, str):
|
|
467
|
+
message.content = replace_sensitive(message.content)
|
|
468
|
+
elif isinstance(message.content, list):
|
|
469
|
+
for i, item in enumerate(message.content):
|
|
470
|
+
if isinstance(item, ContentPartTextParam):
|
|
471
|
+
item.text = replace_sensitive(item.text)
|
|
472
|
+
message.content[i] = item
|
|
473
|
+
return message
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import anyio
|
|
9
|
+
|
|
10
|
+
from browser_use.llm.messages import BaseMessage
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def save_conversation(
|
|
16
|
+
input_messages: list[BaseMessage],
|
|
17
|
+
response: Any,
|
|
18
|
+
target: str | Path,
|
|
19
|
+
encoding: str | None = None,
|
|
20
|
+
) -> None:
|
|
21
|
+
"""Save conversation history to file asynchronously."""
|
|
22
|
+
target_path = Path(target)
|
|
23
|
+
# create folders if not exists
|
|
24
|
+
if target_path.parent:
|
|
25
|
+
await anyio.Path(target_path.parent).mkdir(parents=True, exist_ok=True)
|
|
26
|
+
|
|
27
|
+
await anyio.Path(target_path).write_text(
|
|
28
|
+
await _format_conversation(input_messages, response),
|
|
29
|
+
encoding=encoding or 'utf-8',
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def _format_conversation(messages: list[BaseMessage], response: Any) -> str:
|
|
34
|
+
"""Format the conversation including messages and response."""
|
|
35
|
+
lines = []
|
|
36
|
+
|
|
37
|
+
# Format messages
|
|
38
|
+
for message in messages:
|
|
39
|
+
lines.append(f' {message.role} ')
|
|
40
|
+
|
|
41
|
+
lines.append(message.text)
|
|
42
|
+
lines.append('') # Empty line after each message
|
|
43
|
+
|
|
44
|
+
# Format response
|
|
45
|
+
lines.append(' RESPONSE')
|
|
46
|
+
lines.append(json.dumps(json.loads(response.model_dump_json(exclude_unset=True)), indent=2))
|
|
47
|
+
|
|
48
|
+
return '\n'.join(lines)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# Note: _write_messages_to_file and _write_response_to_file have been merged into _format_conversation
|
|
52
|
+
# This is more efficient for async operations and reduces file I/O
|