optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+ from browser_use.llm.messages import (
8
+ BaseMessage,
9
+ )
10
+
11
+ if TYPE_CHECKING:
12
+ pass
13
+
14
+
15
+ class HistoryItem(BaseModel):
16
+ """Represents a single agent history item with its data and string representation"""
17
+
18
+ step_number: int | None = None
19
+ evaluation_previous_goal: str | None = None
20
+ memory: str | None = None
21
+ next_goal: str | None = None
22
+ action_results: str | None = None
23
+ error: str | None = None
24
+ system_message: str | None = None
25
+
26
+ model_config = ConfigDict(arbitrary_types_allowed=True)
27
+
28
+ def model_post_init(self, __context) -> None:
29
+ """Validate that error and system_message are not both provided"""
30
+ if self.error is not None and self.system_message is not None:
31
+ raise ValueError('Cannot have both error and system_message at the same time')
32
+
33
+ def to_string(self) -> str:
34
+ """Get string representation of the history item"""
35
+ step_str = 'step' if self.step_number is not None else 'step_unknown'
36
+
37
+ if self.error:
38
+ return f"""<{step_str}>
39
+ {self.error}"""
40
+ elif self.system_message:
41
+ return self.system_message
42
+ else:
43
+ content_parts = []
44
+
45
+ # Only include evaluation_previous_goal if it's not None/empty
46
+ if self.evaluation_previous_goal:
47
+ content_parts.append(f'{self.evaluation_previous_goal}')
48
+
49
+ # Always include memory
50
+ if self.memory:
51
+ content_parts.append(f'{self.memory}')
52
+
53
+ # Only include next_goal if it's not None/empty
54
+ if self.next_goal:
55
+ content_parts.append(f'{self.next_goal}')
56
+
57
+ if self.action_results:
58
+ content_parts.append(self.action_results)
59
+
60
+ content = '\n'.join(content_parts)
61
+
62
+ return f"""<{step_str}>
63
+ {content}"""
64
+
65
+
66
+ class MessageHistory(BaseModel):
67
+ """History of messages"""
68
+
69
+ system_message: BaseMessage | None = None
70
+ state_message: BaseMessage | None = None
71
+ context_messages: list[BaseMessage] = Field(default_factory=list)
72
+ model_config = ConfigDict(arbitrary_types_allowed=True)
73
+
74
+ def get_messages(self) -> list[BaseMessage]:
75
+ """Get all messages in the correct order: system -> state -> contextual"""
76
+ messages = []
77
+ if self.system_message:
78
+ messages.append(self.system_message)
79
+ if self.state_message:
80
+ messages.append(self.state_message)
81
+ messages.extend(self.context_messages)
82
+
83
+ return messages
84
+
85
+
86
+ class MessageManagerState(BaseModel):
87
+ """Holds the state for MessageManager"""
88
+
89
+ history: MessageHistory = Field(default_factory=MessageHistory)
90
+ tool_id: int = 1
91
+ agent_history_items: list[HistoryItem] = Field(
92
+ default_factory=lambda: [HistoryItem(step_number=0, system_message='Agent initialized')]
93
+ )
94
+ read_state_description: str = ''
95
+ # Images to include in the next state message (cleared after each step)
96
+ read_state_images: list[dict[str, Any]] = Field(default_factory=list)
97
+
98
+ model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -0,0 +1,413 @@
1
+ import importlib.resources
2
+ from datetime import datetime
3
+ from typing import TYPE_CHECKING, Literal, Optional
4
+
5
+ from browser_use.dom.views import NodeType, SimplifiedNode
6
+ from browser_use.llm.messages import ContentPartImageParam, ContentPartTextParam, ImageURL, SystemMessage, UserMessage
7
+ from browser_use.observability import observe_debug
8
+ from browser_use.utils import is_new_tab_page
9
+
10
+ if TYPE_CHECKING:
11
+ from browser_use.agent.views import AgentStepInfo
12
+ from browser_use.browser.views import BrowserStateSummary
13
+ from browser_use.filesystem.file_system import FileSystem
14
+
15
+
16
+ class SystemPrompt:
17
+ def __init__(
18
+ self,
19
+ max_actions_per_step: int = 10,
20
+ override_system_message: str | None = None,
21
+ extend_system_message: str | None = None,
22
+ use_thinking: bool = True,
23
+ flash_mode: bool = False,
24
+ ):
25
+ self.max_actions_per_step = max_actions_per_step
26
+ self.use_thinking = use_thinking
27
+ self.flash_mode = flash_mode
28
+ prompt = ''
29
+ if override_system_message is not None:
30
+ prompt = override_system_message
31
+ else:
32
+ self._load_prompt_template()
33
+ prompt = self.prompt_template.format(max_actions=self.max_actions_per_step)
34
+
35
+ if extend_system_message:
36
+ prompt += f'\n{extend_system_message}'
37
+
38
+ self.system_message = SystemMessage(content=prompt, cache=True)
39
+
40
+ def _load_prompt_template(self) -> None:
41
+ """Load the prompt template from the markdown file."""
42
+ try:
43
+ # Choose the appropriate template based on flash_mode and use_thinking settings
44
+ if self.flash_mode:
45
+ template_filename = 'system_prompt_flash.md'
46
+ elif self.use_thinking:
47
+ template_filename = 'system_prompt.md'
48
+ else:
49
+ template_filename = 'system_prompt_no_thinking.md'
50
+
51
+ # This works both in development and when installed as a package
52
+ with importlib.resources.files('browser_use.agent').joinpath(template_filename).open('r', encoding='utf-8') as f:
53
+ self.prompt_template = f.read()
54
+ except Exception as e:
55
+ raise RuntimeError(f'Failed to load system prompt template: {e}')
56
+
57
+ def get_system_message(self) -> SystemMessage:
58
+ """
59
+ Get the system prompt for the agent.
60
+
61
+ Returns:
62
+ SystemMessage: Formatted system prompt
63
+ """
64
+ return self.system_message
65
+
66
+
67
+ class AgentMessagePrompt:
68
+ vision_detail_level: Literal['auto', 'low', 'high']
69
+
70
+ def __init__(
71
+ self,
72
+ browser_state_summary: 'BrowserStateSummary',
73
+ file_system: 'FileSystem',
74
+ agent_history_description: str | None = None,
75
+ read_state_description: str | None = None,
76
+ task: str | None = None,
77
+ include_attributes: list[str] | None = None,
78
+ step_info: Optional['AgentStepInfo'] = None,
79
+ page_filtered_actions: str | None = None,
80
+ max_clickable_elements_length: int = 40000,
81
+ sensitive_data: str | None = None,
82
+ available_file_paths: list[str] | None = None,
83
+ screenshots: list[str] | None = None,
84
+ vision_detail_level: Literal['auto', 'low', 'high'] = 'auto',
85
+ include_recent_events: bool = False,
86
+ sample_images: list[ContentPartTextParam | ContentPartImageParam] | None = None,
87
+ read_state_images: list[dict] | None = None,
88
+ ):
89
+ self.browser_state: 'BrowserStateSummary' = browser_state_summary
90
+ self.file_system: 'FileSystem | None' = file_system
91
+ self.agent_history_description: str | None = agent_history_description
92
+ self.read_state_description: str | None = read_state_description
93
+ self.task: str | None = task
94
+ self.include_attributes = include_attributes
95
+ self.step_info = step_info
96
+ self.page_filtered_actions: str | None = page_filtered_actions
97
+ self.max_clickable_elements_length: int = max_clickable_elements_length
98
+ self.sensitive_data: str | None = sensitive_data
99
+ self.available_file_paths: list[str] | None = available_file_paths
100
+ self.screenshots = screenshots or []
101
+ self.vision_detail_level = vision_detail_level
102
+ self.include_recent_events = include_recent_events
103
+ self.sample_images = sample_images or []
104
+ self.read_state_images = read_state_images or []
105
+ assert self.browser_state
106
+
107
+ def _extract_page_statistics(self) -> dict[str, int]:
108
+ """Extract high-level page statistics from DOM tree for LLM context"""
109
+ stats = {
110
+ 'links': 0,
111
+ 'iframes': 0,
112
+ 'shadow_open': 0,
113
+ 'shadow_closed': 0,
114
+ 'scroll_containers': 0,
115
+ 'images': 0,
116
+ 'interactive_elements': 0,
117
+ 'total_elements': 0,
118
+ }
119
+
120
+ if not self.browser_state.dom_state or not self.browser_state.dom_state._root:
121
+ return stats
122
+
123
+ def traverse_node(node: SimplifiedNode) -> None:
124
+ """Recursively traverse simplified DOM tree to count elements"""
125
+ if not node or not node.original_node:
126
+ return
127
+
128
+ original = node.original_node
129
+ stats['total_elements'] += 1
130
+
131
+ # Count by node type and tag
132
+ if original.node_type == NodeType.ELEMENT_NODE:
133
+ tag = original.tag_name.lower() if original.tag_name else ''
134
+
135
+ if tag == 'a':
136
+ stats['links'] += 1
137
+ elif tag in ('iframe', 'frame'):
138
+ stats['iframes'] += 1
139
+ elif tag == 'img':
140
+ stats['images'] += 1
141
+
142
+ # Check if scrollable
143
+ if original.is_actually_scrollable:
144
+ stats['scroll_containers'] += 1
145
+
146
+ # Check if interactive
147
+ if node.is_interactive:
148
+ stats['interactive_elements'] += 1
149
+
150
+ # Check if this element hosts shadow DOM
151
+ if node.is_shadow_host:
152
+ # Check if any shadow children are closed
153
+ has_closed_shadow = any(
154
+ child.original_node.node_type == NodeType.DOCUMENT_FRAGMENT_NODE
155
+ and child.original_node.shadow_root_type
156
+ and child.original_node.shadow_root_type.lower() == 'closed'
157
+ for child in node.children
158
+ )
159
+ if has_closed_shadow:
160
+ stats['shadow_closed'] += 1
161
+ else:
162
+ stats['shadow_open'] += 1
163
+
164
+ elif original.node_type == NodeType.DOCUMENT_FRAGMENT_NODE:
165
+ # Shadow DOM fragment - these are the actual shadow roots
166
+ # But don't double-count since we count them at the host level above
167
+ pass
168
+
169
+ # Traverse children
170
+ for child in node.children:
171
+ traverse_node(child)
172
+
173
+ traverse_node(self.browser_state.dom_state._root)
174
+ return stats
175
+
176
+ @observe_debug(ignore_input=True, ignore_output=True, name='_get_browser_state_description')
177
+ def _get_browser_state_description(self) -> str:
178
+ # Extract page statistics first
179
+ page_stats = self._extract_page_statistics()
180
+
181
+ # Format statistics for LLM
182
+ stats_text = '<page_stats>'
183
+ if page_stats['total_elements'] < 10:
184
+ stats_text += 'Page appears empty (SPA not loaded?) - '
185
+ stats_text += f'{page_stats["links"]} links, {page_stats["interactive_elements"]} interactive, '
186
+ stats_text += f'{page_stats["iframes"]} iframes, {page_stats["scroll_containers"]} scroll containers'
187
+ if page_stats['shadow_open'] > 0 or page_stats['shadow_closed'] > 0:
188
+ stats_text += f', {page_stats["shadow_open"]} shadow(open), {page_stats["shadow_closed"]} shadow(closed)'
189
+ if page_stats['images'] > 0:
190
+ stats_text += f', {page_stats["images"]} images'
191
+ stats_text += f', {page_stats["total_elements"]} total elements'
192
+ stats_text += '</page_stats>\n'
193
+
194
+ elements_text = self.browser_state.dom_state.llm_representation(include_attributes=self.include_attributes)
195
+
196
+ if len(elements_text) > self.max_clickable_elements_length:
197
+ elements_text = elements_text[: self.max_clickable_elements_length]
198
+ truncated_text = f' (truncated to {self.max_clickable_elements_length} characters)'
199
+ else:
200
+ truncated_text = ''
201
+
202
+ has_content_above = False
203
+ has_content_below = False
204
+ # Enhanced page information for the model
205
+ page_info_text = ''
206
+ if self.browser_state.page_info:
207
+ pi = self.browser_state.page_info
208
+ # Compute page statistics dynamically
209
+ pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
210
+ pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
211
+ has_content_above = pages_above > 0
212
+ has_content_below = pages_below > 0
213
+ total_pages = pi.page_height / pi.viewport_height if pi.viewport_height > 0 else 0
214
+ current_page_position = pi.scroll_y / max(pi.page_height - pi.viewport_height, 1)
215
+ page_info_text = '<page_info>'
216
+ page_info_text += f'{pages_above:.1f} pages above, '
217
+ page_info_text += f'{pages_below:.1f} pages below, '
218
+ page_info_text += f'{total_pages:.1f} total pages'
219
+ page_info_text += '</page_info>\n'
220
+ # , at {current_page_position:.0%} of page
221
+ if elements_text != '':
222
+ if has_content_above:
223
+ if self.browser_state.page_info:
224
+ pi = self.browser_state.page_info
225
+ pages_above = pi.pixels_above / pi.viewport_height if pi.viewport_height > 0 else 0
226
+ elements_text = f'... {pages_above:.1f} pages above ...\n{elements_text}'
227
+ else:
228
+ elements_text = f'[Start of page]\n{elements_text}'
229
+ if has_content_below:
230
+ if self.browser_state.page_info:
231
+ pi = self.browser_state.page_info
232
+ pages_below = pi.pixels_below / pi.viewport_height if pi.viewport_height > 0 else 0
233
+ elements_text = f'{elements_text}\n... {pages_below:.1f} pages below ...'
234
+ else:
235
+ elements_text = f'{elements_text}\n[End of page]'
236
+ else:
237
+ elements_text = 'empty page'
238
+
239
+ tabs_text = ''
240
+ current_tab_candidates = []
241
+
242
+ # Find tabs that match both URL and title to identify current tab more reliably
243
+ for tab in self.browser_state.tabs:
244
+ if tab.url == self.browser_state.url and tab.title == self.browser_state.title:
245
+ current_tab_candidates.append(tab.target_id)
246
+
247
+ # If we have exactly one match, mark it as current
248
+ # Otherwise, don't mark any tab as current to avoid confusion
249
+ current_target_id = current_tab_candidates[0] if len(current_tab_candidates) == 1 else None
250
+
251
+ for tab in self.browser_state.tabs:
252
+ tabs_text += f'Tab {tab.target_id[-4:]}: {tab.url} - {tab.title[:30]}\n'
253
+
254
+ current_tab_text = f'Current tab: {current_target_id[-4:]}' if current_target_id is not None else ''
255
+
256
+ # Check if current page is a PDF viewer and add appropriate message
257
+ pdf_message = ''
258
+ if self.browser_state.is_pdf_viewer:
259
+ pdf_message = (
260
+ 'PDF viewer cannot be rendered. In this page, DO NOT use the extract action as PDF content cannot be rendered. '
261
+ )
262
+ pdf_message += (
263
+ 'Use the read_file action on the downloaded PDF in available_file_paths to read the full text content.\n\n'
264
+ )
265
+
266
+ # Add recent events if available and requested
267
+ recent_events_text = ''
268
+ if self.include_recent_events and self.browser_state.recent_events:
269
+ recent_events_text = f'Recent browser events: {self.browser_state.recent_events}\n'
270
+
271
+ # Add closed popup messages if any
272
+ closed_popups_text = ''
273
+ if self.browser_state.closed_popup_messages:
274
+ closed_popups_text = 'Auto-closed JavaScript dialogs:\n'
275
+ for popup_msg in self.browser_state.closed_popup_messages:
276
+ closed_popups_text += f' - {popup_msg}\n'
277
+ closed_popups_text += '\n'
278
+
279
+ browser_state = f"""{stats_text}{current_tab_text}
280
+ Available tabs:
281
+ {tabs_text}
282
+ {page_info_text}
283
+ {recent_events_text}{closed_popups_text}{pdf_message}Interactive elements{truncated_text}:
284
+ {elements_text}
285
+ """
286
+ return browser_state
287
+
288
+ def _get_agent_state_description(self) -> str:
289
+ if self.step_info:
290
+ step_info_description = f'Step{self.step_info.step_number + 1} maximum:{self.step_info.max_steps}\n'
291
+ else:
292
+ step_info_description = ''
293
+
294
+ time_str = datetime.now().strftime('%Y-%m-%d')
295
+ step_info_description += f'Today:{time_str}'
296
+
297
+ _todo_contents = self.file_system.get_todo_contents() if self.file_system else ''
298
+ if not len(_todo_contents):
299
+ _todo_contents = '[empty todo.md, fill it when applicable]'
300
+
301
+ agent_state = f"""
302
+ <user_request>
303
+ {self.task}
304
+ </user_request>
305
+ <file_system>
306
+ {self.file_system.describe() if self.file_system else 'No file system available'}
307
+ </file_system>
308
+ <todo_contents>
309
+ {_todo_contents}
310
+ </todo_contents>
311
+ """
312
+ if self.sensitive_data:
313
+ agent_state += f'<sensitive_data>{self.sensitive_data}</sensitive_data>\n'
314
+
315
+ agent_state += f'<step_info>{step_info_description}</step_info>\n'
316
+ if self.available_file_paths:
317
+ available_file_paths_text = '\n'.join(self.available_file_paths)
318
+ agent_state += f'<available_file_paths>{available_file_paths_text}\nUse with absolute paths</available_file_paths>\n'
319
+ return agent_state
320
+
321
+ @observe_debug(ignore_input=True, ignore_output=True, name='get_user_message')
322
+ def get_user_message(self, use_vision: bool = True) -> UserMessage:
323
+ """Get complete state as a single cached message"""
324
+ # Don't pass screenshot to model if page is a new tab page, step is 0, and there's only one tab
325
+ if (
326
+ is_new_tab_page(self.browser_state.url)
327
+ and self.step_info is not None
328
+ and self.step_info.step_number == 0
329
+ and len(self.browser_state.tabs) == 1
330
+ ):
331
+ use_vision = False
332
+
333
+ # Build complete state description
334
+ state_description = (
335
+ '<agent_history>\n'
336
+ + (self.agent_history_description.strip('\n') if self.agent_history_description else '')
337
+ + '\n</agent_history>\n\n'
338
+ )
339
+ state_description += '<agent_state>\n' + self._get_agent_state_description().strip('\n') + '\n</agent_state>\n'
340
+ state_description += '<browser_state>\n' + self._get_browser_state_description().strip('\n') + '\n</browser_state>\n'
341
+ # Only add read_state if it has content
342
+ read_state_description = self.read_state_description.strip('\n').strip() if self.read_state_description else ''
343
+ if read_state_description:
344
+ state_description += '<read_state>\n' + read_state_description + '\n</read_state>\n'
345
+
346
+ if self.page_filtered_actions:
347
+ state_description += '<page_specific_actions>\n'
348
+ state_description += self.page_filtered_actions + '\n'
349
+ state_description += '</page_specific_actions>\n'
350
+
351
+ # Check if we have images to include (from read_file action)
352
+ has_images = bool(self.read_state_images)
353
+
354
+ if (use_vision is True and self.screenshots) or has_images:
355
+ # Start with text description
356
+ content_parts: list[ContentPartTextParam | ContentPartImageParam] = [ContentPartTextParam(text=state_description)]
357
+
358
+ # Add sample images
359
+ content_parts.extend(self.sample_images)
360
+
361
+ # Add screenshots with labels
362
+ for i, screenshot in enumerate(self.screenshots):
363
+ if i == len(self.screenshots) - 1:
364
+ label = 'Current screenshot:'
365
+ else:
366
+ # Use simple, accurate labeling since we don't have actual step timing info
367
+ label = 'Previous screenshot:'
368
+
369
+ # Add label as text content
370
+ content_parts.append(ContentPartTextParam(text=label))
371
+
372
+ # Add the screenshot
373
+ content_parts.append(
374
+ ContentPartImageParam(
375
+ image_url=ImageURL(
376
+ url=f'data:image/jpeg;base64,{screenshot}',
377
+ media_type='image/jpeg',
378
+ detail=self.vision_detail_level,
379
+ ),
380
+ )
381
+ )
382
+
383
+ # Add read_state images (from read_file action) before screenshots
384
+ for img_data in self.read_state_images:
385
+ img_name = img_data.get('name', 'unknown')
386
+ img_base64 = img_data.get('data', '')
387
+
388
+ if not img_base64:
389
+ continue
390
+
391
+ # Detect image format from name
392
+ if img_name.lower().endswith('.png'):
393
+ media_type = 'image/png'
394
+ else:
395
+ media_type = 'image/jpeg'
396
+
397
+ # Add label
398
+ content_parts.append(ContentPartTextParam(text=f'Image from file: {img_name}'))
399
+
400
+ # Add the image
401
+ content_parts.append(
402
+ ContentPartImageParam(
403
+ image_url=ImageURL(
404
+ url=f'data:{media_type};base64,{img_base64}',
405
+ media_type=media_type,
406
+ detail=self.vision_detail_level,
407
+ ),
408
+ )
409
+ )
410
+
411
+ return UserMessage(content=content_parts, cache=True)
412
+
413
+ return UserMessage(content=state_description, cache=True)