optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from os import getenv
|
|
4
|
+
from typing import TYPE_CHECKING, Any, TypeVar, overload
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from browser_use.llm.aws.serializer import AWSBedrockMessageSerializer
|
|
9
|
+
from browser_use.llm.base import BaseChatModel
|
|
10
|
+
from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
|
|
11
|
+
from browser_use.llm.messages import BaseMessage
|
|
12
|
+
from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from boto3 import client as AwsClient # type: ignore
|
|
16
|
+
from boto3.session import Session # type: ignore
|
|
17
|
+
|
|
18
|
+
T = TypeVar('T', bound=BaseModel)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ChatAWSBedrock(BaseChatModel):
|
|
23
|
+
"""
|
|
24
|
+
AWS Bedrock chat model supporting multiple providers (Anthropic, Meta, etc.).
|
|
25
|
+
|
|
26
|
+
This class provides access to various models via AWS Bedrock,
|
|
27
|
+
supporting both text generation and structured output via tool calling.
|
|
28
|
+
|
|
29
|
+
To use this model, you need to either:
|
|
30
|
+
1. Set the following environment variables:
|
|
31
|
+
- AWS_ACCESS_KEY_ID
|
|
32
|
+
- AWS_SECRET_ACCESS_KEY
|
|
33
|
+
- AWS_SESSION_TOKEN (only required when using temporary credentials)
|
|
34
|
+
- AWS_REGION
|
|
35
|
+
2. Or provide a boto3 Session object
|
|
36
|
+
3. Or use AWS SSO authentication
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Model configuration
|
|
40
|
+
model: str = 'anthropic.claude-3-5-sonnet-20240620-v1:0'
|
|
41
|
+
max_tokens: int | None = 4096
|
|
42
|
+
temperature: float | None = None
|
|
43
|
+
top_p: float | None = None
|
|
44
|
+
seed: int | None = None
|
|
45
|
+
stop_sequences: list[str] | None = None
|
|
46
|
+
|
|
47
|
+
# AWS credentials and configuration
|
|
48
|
+
aws_access_key_id: str | None = None
|
|
49
|
+
aws_secret_access_key: str | None = None
|
|
50
|
+
aws_session_token: str | None = None
|
|
51
|
+
aws_region: str | None = None
|
|
52
|
+
aws_sso_auth: bool = False
|
|
53
|
+
session: 'Session | None' = None
|
|
54
|
+
|
|
55
|
+
# Request parameters
|
|
56
|
+
request_params: dict[str, Any] | None = None
|
|
57
|
+
|
|
58
|
+
# Static
|
|
59
|
+
@property
|
|
60
|
+
def provider(self) -> str:
|
|
61
|
+
return 'aws_bedrock'
|
|
62
|
+
|
|
63
|
+
def _get_client(self) -> 'AwsClient': # type: ignore
|
|
64
|
+
"""Get the AWS Bedrock client."""
|
|
65
|
+
try:
|
|
66
|
+
from boto3 import client as AwsClient # type: ignore
|
|
67
|
+
except ImportError:
|
|
68
|
+
raise ImportError(
|
|
69
|
+
'`boto3` not installed. Please install using `pip install browser-use[aws] or pip install browser-use[all]`'
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if self.session:
|
|
73
|
+
return self.session.client('bedrock-runtime')
|
|
74
|
+
|
|
75
|
+
# Get credentials from environment or instance parameters
|
|
76
|
+
access_key = self.aws_access_key_id or getenv('AWS_ACCESS_KEY_ID')
|
|
77
|
+
secret_key = self.aws_secret_access_key or getenv('AWS_SECRET_ACCESS_KEY')
|
|
78
|
+
session_token = self.aws_session_token or getenv('AWS_SESSION_TOKEN')
|
|
79
|
+
region = self.aws_region or getenv('AWS_REGION') or getenv('AWS_DEFAULT_REGION')
|
|
80
|
+
|
|
81
|
+
if self.aws_sso_auth:
|
|
82
|
+
return AwsClient(service_name='bedrock-runtime', region_name=region)
|
|
83
|
+
else:
|
|
84
|
+
if not access_key or not secret_key:
|
|
85
|
+
raise ModelProviderError(
|
|
86
|
+
message='AWS credentials not found. Please set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables (and AWS_SESSION_TOKEN if using temporary credentials) or provide a boto3 session.',
|
|
87
|
+
model=self.name,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return AwsClient(
|
|
91
|
+
service_name='bedrock-runtime',
|
|
92
|
+
region_name=region,
|
|
93
|
+
aws_access_key_id=access_key,
|
|
94
|
+
aws_secret_access_key=secret_key,
|
|
95
|
+
aws_session_token=session_token,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def name(self) -> str:
|
|
100
|
+
return str(self.model)
|
|
101
|
+
|
|
102
|
+
def _get_inference_config(self) -> dict[str, Any]:
|
|
103
|
+
"""Get the inference configuration for the request."""
|
|
104
|
+
config = {}
|
|
105
|
+
if self.max_tokens is not None:
|
|
106
|
+
config['maxTokens'] = self.max_tokens
|
|
107
|
+
if self.temperature is not None:
|
|
108
|
+
config['temperature'] = self.temperature
|
|
109
|
+
if self.top_p is not None:
|
|
110
|
+
config['topP'] = self.top_p
|
|
111
|
+
if self.stop_sequences is not None:
|
|
112
|
+
config['stopSequences'] = self.stop_sequences
|
|
113
|
+
if self.seed is not None:
|
|
114
|
+
config['seed'] = self.seed
|
|
115
|
+
return config
|
|
116
|
+
|
|
117
|
+
def _format_tools_for_request(self, output_format: type[BaseModel]) -> list[dict[str, Any]]:
|
|
118
|
+
"""Format a Pydantic model as a tool for structured output."""
|
|
119
|
+
schema = output_format.model_json_schema()
|
|
120
|
+
|
|
121
|
+
# Convert Pydantic schema to Bedrock tool format
|
|
122
|
+
properties = {}
|
|
123
|
+
required = []
|
|
124
|
+
|
|
125
|
+
for prop_name, prop_info in schema.get('properties', {}).items():
|
|
126
|
+
properties[prop_name] = {
|
|
127
|
+
'type': prop_info.get('type', 'string'),
|
|
128
|
+
'description': prop_info.get('description', ''),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# Add required fields
|
|
132
|
+
required = schema.get('required', [])
|
|
133
|
+
|
|
134
|
+
return [
|
|
135
|
+
{
|
|
136
|
+
'toolSpec': {
|
|
137
|
+
'name': f'extract_{output_format.__name__.lower()}',
|
|
138
|
+
'description': f'Extract information in the format of {output_format.__name__}',
|
|
139
|
+
'inputSchema': {'json': {'type': 'object', 'properties': properties, 'required': required}},
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
def _get_usage(self, response: dict[str, Any]) -> ChatInvokeUsage | None:
|
|
145
|
+
"""Extract usage information from the response."""
|
|
146
|
+
if 'usage' not in response:
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
usage_data = response['usage']
|
|
150
|
+
return ChatInvokeUsage(
|
|
151
|
+
prompt_tokens=usage_data.get('inputTokens', 0),
|
|
152
|
+
completion_tokens=usage_data.get('outputTokens', 0),
|
|
153
|
+
total_tokens=usage_data.get('totalTokens', 0),
|
|
154
|
+
prompt_cached_tokens=None, # Bedrock doesn't provide this
|
|
155
|
+
prompt_cache_creation_tokens=None,
|
|
156
|
+
prompt_image_tokens=None,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@overload
|
|
160
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
|
|
161
|
+
|
|
162
|
+
@overload
|
|
163
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
|
|
164
|
+
|
|
165
|
+
async def ainvoke(
|
|
166
|
+
self, messages: list[BaseMessage], output_format: type[T] | None = None
|
|
167
|
+
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
|
|
168
|
+
"""
|
|
169
|
+
Invoke the AWS Bedrock model with the given messages.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
messages: List of chat messages
|
|
173
|
+
output_format: Optional Pydantic model class for structured output
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Either a string response or an instance of output_format
|
|
177
|
+
"""
|
|
178
|
+
try:
|
|
179
|
+
from botocore.exceptions import ClientError # type: ignore
|
|
180
|
+
except ImportError:
|
|
181
|
+
raise ImportError(
|
|
182
|
+
'`boto3` not installed. Please install using `pip install browser-use[aws] or pip install browser-use[all]`'
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
bedrock_messages, system_message = AWSBedrockMessageSerializer.serialize_messages(messages)
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
# Prepare the request body
|
|
189
|
+
body: dict[str, Any] = {}
|
|
190
|
+
|
|
191
|
+
if system_message:
|
|
192
|
+
body['system'] = system_message
|
|
193
|
+
|
|
194
|
+
inference_config = self._get_inference_config()
|
|
195
|
+
if inference_config:
|
|
196
|
+
body['inferenceConfig'] = inference_config
|
|
197
|
+
|
|
198
|
+
# Handle structured output via tool calling
|
|
199
|
+
if output_format is not None:
|
|
200
|
+
tools = self._format_tools_for_request(output_format)
|
|
201
|
+
body['toolConfig'] = {'tools': tools}
|
|
202
|
+
|
|
203
|
+
# Add any additional request parameters
|
|
204
|
+
if self.request_params:
|
|
205
|
+
body.update(self.request_params)
|
|
206
|
+
|
|
207
|
+
# Filter out None values
|
|
208
|
+
body = {k: v for k, v in body.items() if v is not None}
|
|
209
|
+
|
|
210
|
+
# Make the API call
|
|
211
|
+
client = self._get_client()
|
|
212
|
+
response = client.converse(modelId=self.model, messages=bedrock_messages, **body)
|
|
213
|
+
|
|
214
|
+
usage = self._get_usage(response)
|
|
215
|
+
|
|
216
|
+
# Extract the response content
|
|
217
|
+
if 'output' in response and 'message' in response['output']:
|
|
218
|
+
message = response['output']['message']
|
|
219
|
+
content = message.get('content', [])
|
|
220
|
+
|
|
221
|
+
if output_format is None:
|
|
222
|
+
# Return text response
|
|
223
|
+
text_content = []
|
|
224
|
+
for item in content:
|
|
225
|
+
if 'text' in item:
|
|
226
|
+
text_content.append(item['text'])
|
|
227
|
+
|
|
228
|
+
response_text = '\n'.join(text_content) if text_content else ''
|
|
229
|
+
return ChatInvokeCompletion(
|
|
230
|
+
completion=response_text,
|
|
231
|
+
usage=usage,
|
|
232
|
+
)
|
|
233
|
+
else:
|
|
234
|
+
# Handle structured output from tool calls
|
|
235
|
+
for item in content:
|
|
236
|
+
if 'toolUse' in item:
|
|
237
|
+
tool_use = item['toolUse']
|
|
238
|
+
tool_input = tool_use.get('input', {})
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
# Validate and return the structured output
|
|
242
|
+
return ChatInvokeCompletion(
|
|
243
|
+
completion=output_format.model_validate(tool_input),
|
|
244
|
+
usage=usage,
|
|
245
|
+
)
|
|
246
|
+
except Exception as e:
|
|
247
|
+
# If validation fails, try to parse as JSON first
|
|
248
|
+
if isinstance(tool_input, str):
|
|
249
|
+
try:
|
|
250
|
+
data = json.loads(tool_input)
|
|
251
|
+
return ChatInvokeCompletion(
|
|
252
|
+
completion=output_format.model_validate(data),
|
|
253
|
+
usage=usage,
|
|
254
|
+
)
|
|
255
|
+
except json.JSONDecodeError:
|
|
256
|
+
pass
|
|
257
|
+
raise ModelProviderError(
|
|
258
|
+
message=f'Failed to validate structured output: {str(e)}',
|
|
259
|
+
model=self.name,
|
|
260
|
+
) from e
|
|
261
|
+
|
|
262
|
+
# If no tool use found but output_format was requested
|
|
263
|
+
raise ModelProviderError(
|
|
264
|
+
message='Expected structured output but no tool use found in response',
|
|
265
|
+
model=self.name,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# If no valid content found
|
|
269
|
+
if output_format is None:
|
|
270
|
+
return ChatInvokeCompletion(
|
|
271
|
+
completion='',
|
|
272
|
+
usage=usage,
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
raise ModelProviderError(
|
|
276
|
+
message='No valid content found in response',
|
|
277
|
+
model=self.name,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
except ClientError as e:
|
|
281
|
+
error_code = e.response.get('Error', {}).get('Code', 'Unknown')
|
|
282
|
+
error_message = e.response.get('Error', {}).get('Message', str(e))
|
|
283
|
+
|
|
284
|
+
if error_code in ['ThrottlingException', 'TooManyRequestsException']:
|
|
285
|
+
raise ModelRateLimitError(message=error_message, model=self.name) from e
|
|
286
|
+
else:
|
|
287
|
+
raise ModelProviderError(message=error_message, model=self.name) from e
|
|
288
|
+
except Exception as e:
|
|
289
|
+
raise ModelProviderError(message=str(e), model=self.name) from e
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any, overload
|
|
5
|
+
|
|
6
|
+
from browser_use.llm.messages import (
|
|
7
|
+
AssistantMessage,
|
|
8
|
+
BaseMessage,
|
|
9
|
+
ContentPartImageParam,
|
|
10
|
+
ContentPartRefusalParam,
|
|
11
|
+
ContentPartTextParam,
|
|
12
|
+
SystemMessage,
|
|
13
|
+
ToolCall,
|
|
14
|
+
UserMessage,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AWSBedrockMessageSerializer:
|
|
19
|
+
"""Serializer for converting between custom message types and AWS Bedrock message format."""
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def _is_base64_image(url: str) -> bool:
|
|
23
|
+
"""Check if the URL is a base64 encoded image."""
|
|
24
|
+
return url.startswith('data:image/')
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _is_url_image(url: str) -> bool:
|
|
28
|
+
"""Check if the URL is a regular HTTP/HTTPS image URL."""
|
|
29
|
+
return url.startswith(('http://', 'https://')) and any(
|
|
30
|
+
url.lower().endswith(ext) for ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def _parse_base64_url(url: str) -> tuple[str, bytes]:
|
|
35
|
+
"""Parse a base64 data URL to extract format and raw bytes."""
|
|
36
|
+
# Format: data:image/jpeg;base64,<data>
|
|
37
|
+
if not url.startswith('data:'):
|
|
38
|
+
raise ValueError(f'Invalid base64 URL: {url}')
|
|
39
|
+
|
|
40
|
+
header, data = url.split(',', 1)
|
|
41
|
+
|
|
42
|
+
# Extract format from mime type
|
|
43
|
+
mime_match = re.search(r'image/(\w+)', header)
|
|
44
|
+
if mime_match:
|
|
45
|
+
format_name = mime_match.group(1).lower()
|
|
46
|
+
# Map common formats
|
|
47
|
+
format_mapping = {'jpg': 'jpeg', 'jpeg': 'jpeg', 'png': 'png', 'gif': 'gif', 'webp': 'webp'}
|
|
48
|
+
image_format = format_mapping.get(format_name, 'jpeg')
|
|
49
|
+
else:
|
|
50
|
+
image_format = 'jpeg' # Default format
|
|
51
|
+
|
|
52
|
+
# Decode base64 data
|
|
53
|
+
try:
|
|
54
|
+
image_bytes = base64.b64decode(data)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
raise ValueError(f'Failed to decode base64 image data: {e}')
|
|
57
|
+
|
|
58
|
+
return image_format, image_bytes
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _download_and_convert_image(url: str) -> tuple[str, bytes]:
|
|
62
|
+
"""Download an image from URL and convert to base64 bytes."""
|
|
63
|
+
try:
|
|
64
|
+
import httpx
|
|
65
|
+
except ImportError:
|
|
66
|
+
raise ImportError('httpx not available. Please install it to use URL images with AWS Bedrock.')
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
response = httpx.get(url, timeout=30)
|
|
70
|
+
response.raise_for_status()
|
|
71
|
+
|
|
72
|
+
# Detect format from content type or URL
|
|
73
|
+
content_type = response.headers.get('content-type', '').lower()
|
|
74
|
+
if 'jpeg' in content_type or url.lower().endswith(('.jpg', '.jpeg')):
|
|
75
|
+
image_format = 'jpeg'
|
|
76
|
+
elif 'png' in content_type or url.lower().endswith('.png'):
|
|
77
|
+
image_format = 'png'
|
|
78
|
+
elif 'gif' in content_type or url.lower().endswith('.gif'):
|
|
79
|
+
image_format = 'gif'
|
|
80
|
+
elif 'webp' in content_type or url.lower().endswith('.webp'):
|
|
81
|
+
image_format = 'webp'
|
|
82
|
+
else:
|
|
83
|
+
image_format = 'jpeg' # Default format
|
|
84
|
+
|
|
85
|
+
return image_format, response.content
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
raise ValueError(f'Failed to download image from {url}: {e}')
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def _serialize_content_part_text(part: ContentPartTextParam) -> dict[str, Any]:
|
|
92
|
+
"""Convert a text content part to AWS Bedrock format."""
|
|
93
|
+
return {'text': part.text}
|
|
94
|
+
|
|
95
|
+
@staticmethod
|
|
96
|
+
def _serialize_content_part_image(part: ContentPartImageParam) -> dict[str, Any]:
|
|
97
|
+
"""Convert an image content part to AWS Bedrock format."""
|
|
98
|
+
url = part.image_url.url
|
|
99
|
+
|
|
100
|
+
if AWSBedrockMessageSerializer._is_base64_image(url):
|
|
101
|
+
# Handle base64 encoded images
|
|
102
|
+
image_format, image_bytes = AWSBedrockMessageSerializer._parse_base64_url(url)
|
|
103
|
+
elif AWSBedrockMessageSerializer._is_url_image(url):
|
|
104
|
+
# Download and convert URL images
|
|
105
|
+
image_format, image_bytes = AWSBedrockMessageSerializer._download_and_convert_image(url)
|
|
106
|
+
else:
|
|
107
|
+
raise ValueError(f'Unsupported image URL format: {url}')
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
'image': {
|
|
111
|
+
'format': image_format,
|
|
112
|
+
'source': {
|
|
113
|
+
'bytes': image_bytes,
|
|
114
|
+
},
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@staticmethod
|
|
119
|
+
def _serialize_user_content(
|
|
120
|
+
content: str | list[ContentPartTextParam | ContentPartImageParam],
|
|
121
|
+
) -> list[dict[str, Any]]:
|
|
122
|
+
"""Serialize content for user messages."""
|
|
123
|
+
if isinstance(content, str):
|
|
124
|
+
return [{'text': content}]
|
|
125
|
+
|
|
126
|
+
content_blocks: list[dict[str, Any]] = []
|
|
127
|
+
for part in content:
|
|
128
|
+
if part.type == 'text':
|
|
129
|
+
content_blocks.append(AWSBedrockMessageSerializer._serialize_content_part_text(part))
|
|
130
|
+
elif part.type == 'image_url':
|
|
131
|
+
content_blocks.append(AWSBedrockMessageSerializer._serialize_content_part_image(part))
|
|
132
|
+
|
|
133
|
+
return content_blocks
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def _serialize_system_content(
|
|
137
|
+
content: str | list[ContentPartTextParam],
|
|
138
|
+
) -> list[dict[str, Any]]:
|
|
139
|
+
"""Serialize content for system messages."""
|
|
140
|
+
if isinstance(content, str):
|
|
141
|
+
return [{'text': content}]
|
|
142
|
+
|
|
143
|
+
content_blocks: list[dict[str, Any]] = []
|
|
144
|
+
for part in content:
|
|
145
|
+
if part.type == 'text':
|
|
146
|
+
content_blocks.append(AWSBedrockMessageSerializer._serialize_content_part_text(part))
|
|
147
|
+
|
|
148
|
+
return content_blocks
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def _serialize_assistant_content(
|
|
152
|
+
content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None,
|
|
153
|
+
) -> list[dict[str, Any]]:
|
|
154
|
+
"""Serialize content for assistant messages."""
|
|
155
|
+
if content is None:
|
|
156
|
+
return []
|
|
157
|
+
if isinstance(content, str):
|
|
158
|
+
return [{'text': content}]
|
|
159
|
+
|
|
160
|
+
content_blocks: list[dict[str, Any]] = []
|
|
161
|
+
for part in content:
|
|
162
|
+
if part.type == 'text':
|
|
163
|
+
content_blocks.append(AWSBedrockMessageSerializer._serialize_content_part_text(part))
|
|
164
|
+
# Skip refusal content parts - AWS Bedrock doesn't need them
|
|
165
|
+
|
|
166
|
+
return content_blocks
|
|
167
|
+
|
|
168
|
+
@staticmethod
|
|
169
|
+
def _serialize_tool_call(tool_call: ToolCall) -> dict[str, Any]:
|
|
170
|
+
"""Convert a tool call to AWS Bedrock format."""
|
|
171
|
+
try:
|
|
172
|
+
arguments = json.loads(tool_call.function.arguments)
|
|
173
|
+
except json.JSONDecodeError:
|
|
174
|
+
# If arguments aren't valid JSON, wrap them
|
|
175
|
+
arguments = {'arguments': tool_call.function.arguments}
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
'toolUse': {
|
|
179
|
+
'toolUseId': tool_call.id,
|
|
180
|
+
'name': tool_call.function.name,
|
|
181
|
+
'input': arguments,
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# region - Serialize overloads
|
|
186
|
+
@overload
|
|
187
|
+
@staticmethod
|
|
188
|
+
def serialize(message: UserMessage) -> dict[str, Any]: ...
|
|
189
|
+
|
|
190
|
+
@overload
|
|
191
|
+
@staticmethod
|
|
192
|
+
def serialize(message: SystemMessage) -> SystemMessage: ...
|
|
193
|
+
|
|
194
|
+
@overload
|
|
195
|
+
@staticmethod
|
|
196
|
+
def serialize(message: AssistantMessage) -> dict[str, Any]: ...
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def serialize(message: BaseMessage) -> dict[str, Any] | SystemMessage:
|
|
200
|
+
"""Serialize a custom message to AWS Bedrock format."""
|
|
201
|
+
|
|
202
|
+
if isinstance(message, UserMessage):
|
|
203
|
+
return {
|
|
204
|
+
'role': 'user',
|
|
205
|
+
'content': AWSBedrockMessageSerializer._serialize_user_content(message.content),
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
elif isinstance(message, SystemMessage):
|
|
209
|
+
# System messages are handled separately in AWS Bedrock
|
|
210
|
+
return message
|
|
211
|
+
|
|
212
|
+
elif isinstance(message, AssistantMessage):
|
|
213
|
+
content_blocks: list[dict[str, Any]] = []
|
|
214
|
+
|
|
215
|
+
# Add content blocks if present
|
|
216
|
+
if message.content is not None:
|
|
217
|
+
content_blocks.extend(AWSBedrockMessageSerializer._serialize_assistant_content(message.content))
|
|
218
|
+
|
|
219
|
+
# Add tool use blocks if present
|
|
220
|
+
if message.tool_calls:
|
|
221
|
+
for tool_call in message.tool_calls:
|
|
222
|
+
content_blocks.append(AWSBedrockMessageSerializer._serialize_tool_call(tool_call))
|
|
223
|
+
|
|
224
|
+
# AWS Bedrock requires at least one content block
|
|
225
|
+
if not content_blocks:
|
|
226
|
+
content_blocks = [{'text': ''}]
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
'role': 'assistant',
|
|
230
|
+
'content': content_blocks,
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
else:
|
|
234
|
+
raise ValueError(f'Unknown message type: {type(message)}')
|
|
235
|
+
|
|
236
|
+
@staticmethod
|
|
237
|
+
def serialize_messages(messages: list[BaseMessage]) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
|
|
238
|
+
"""
|
|
239
|
+
Serialize a list of messages, extracting any system message.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Tuple of (bedrock_messages, system_message) where system_message is extracted
|
|
243
|
+
from any SystemMessage in the list.
|
|
244
|
+
"""
|
|
245
|
+
bedrock_messages: list[dict[str, Any]] = []
|
|
246
|
+
system_message: list[dict[str, Any]] | None = None
|
|
247
|
+
|
|
248
|
+
for message in messages:
|
|
249
|
+
if isinstance(message, SystemMessage):
|
|
250
|
+
# Extract system message content
|
|
251
|
+
system_message = AWSBedrockMessageSerializer._serialize_system_content(message.content)
|
|
252
|
+
else:
|
|
253
|
+
# Serialize and add to regular messages
|
|
254
|
+
serialized = AWSBedrockMessageSerializer.serialize(message)
|
|
255
|
+
bedrock_messages.append(serialized)
|
|
256
|
+
|
|
257
|
+
return bedrock_messages, system_message
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
from openai import AsyncAzureOpenAI as AsyncAzureOpenAIClient
|
|
7
|
+
from openai.types.shared import ChatModel
|
|
8
|
+
|
|
9
|
+
from browser_use.llm.openai.like import ChatOpenAILike
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ChatAzureOpenAI(ChatOpenAILike):
|
|
14
|
+
"""
|
|
15
|
+
A class for to interact with any provider using the OpenAI API schema.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
model (str): The name of the OpenAI model to use. Defaults to "not-provided".
|
|
19
|
+
api_key (Optional[str]): The API key to use. Defaults to "not-provided".
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
# Model configuration
|
|
23
|
+
model: str | ChatModel
|
|
24
|
+
|
|
25
|
+
# Client initialization parameters
|
|
26
|
+
api_key: str | None = None
|
|
27
|
+
api_version: str | None = '2024-12-01-preview'
|
|
28
|
+
azure_endpoint: str | None = None
|
|
29
|
+
azure_deployment: str | None = None
|
|
30
|
+
base_url: str | None = None
|
|
31
|
+
azure_ad_token: str | None = None
|
|
32
|
+
azure_ad_token_provider: Any | None = None
|
|
33
|
+
|
|
34
|
+
default_headers: dict[str, str] | None = None
|
|
35
|
+
default_query: dict[str, Any] | None = None
|
|
36
|
+
|
|
37
|
+
client: AsyncAzureOpenAIClient | None = None
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def provider(self) -> str:
|
|
41
|
+
return 'azure'
|
|
42
|
+
|
|
43
|
+
def _get_client_params(self) -> dict[str, Any]:
|
|
44
|
+
_client_params: dict[str, Any] = {}
|
|
45
|
+
|
|
46
|
+
self.api_key = self.api_key or os.getenv('AZURE_OPENAI_KEY') or os.getenv('AZURE_OPENAI_API_KEY')
|
|
47
|
+
self.azure_endpoint = self.azure_endpoint or os.getenv('AZURE_OPENAI_ENDPOINT')
|
|
48
|
+
self.azure_deployment = self.azure_deployment or os.getenv('AZURE_OPENAI_DEPLOYMENT')
|
|
49
|
+
params_mapping = {
|
|
50
|
+
'api_key': self.api_key,
|
|
51
|
+
'api_version': self.api_version,
|
|
52
|
+
'organization': self.organization,
|
|
53
|
+
'azure_endpoint': self.azure_endpoint,
|
|
54
|
+
'azure_deployment': self.azure_deployment,
|
|
55
|
+
'base_url': self.base_url,
|
|
56
|
+
'azure_ad_token': self.azure_ad_token,
|
|
57
|
+
'azure_ad_token_provider': self.azure_ad_token_provider,
|
|
58
|
+
'http_client': self.http_client,
|
|
59
|
+
}
|
|
60
|
+
if self.default_headers is not None:
|
|
61
|
+
_client_params['default_headers'] = self.default_headers
|
|
62
|
+
if self.default_query is not None:
|
|
63
|
+
_client_params['default_query'] = self.default_query
|
|
64
|
+
|
|
65
|
+
_client_params.update({k: v for k, v in params_mapping.items() if v is not None})
|
|
66
|
+
|
|
67
|
+
return _client_params
|
|
68
|
+
|
|
69
|
+
def get_client(self) -> AsyncAzureOpenAIClient:
|
|
70
|
+
"""
|
|
71
|
+
Returns an asynchronous OpenAI client.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
AsyncAzureOpenAIClient: An instance of the asynchronous OpenAI client.
|
|
75
|
+
"""
|
|
76
|
+
if self.client:
|
|
77
|
+
return self.client
|
|
78
|
+
|
|
79
|
+
_client_params: dict[str, Any] = self._get_client_params()
|
|
80
|
+
|
|
81
|
+
if self.http_client:
|
|
82
|
+
_client_params['http_client'] = self.http_client
|
|
83
|
+
else:
|
|
84
|
+
# Create a new async HTTP client with custom limits
|
|
85
|
+
_client_params['http_client'] = httpx.AsyncClient(
|
|
86
|
+
limits=httpx.Limits(max_connections=20, max_keepalive_connections=6)
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
self.client = AsyncAzureOpenAIClient(**_client_params)
|
|
90
|
+
|
|
91
|
+
return self.client
|
browser_use/llm/base.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
We have switched all of our code from langchain to openai.types.chat.chat_completion_message_param.
|
|
3
|
+
|
|
4
|
+
For easier transition we have
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Protocol, TypeVar, overload, runtime_checkable
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
from browser_use.llm.messages import BaseMessage
|
|
12
|
+
from browser_use.llm.views import ChatInvokeCompletion
|
|
13
|
+
|
|
14
|
+
T = TypeVar('T', bound=BaseModel)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class BaseChatModel(Protocol):
|
|
19
|
+
_verified_api_keys: bool = False
|
|
20
|
+
|
|
21
|
+
model: str
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def provider(self) -> str: ...
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def name(self) -> str: ...
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def model_name(self) -> str:
|
|
31
|
+
# for legacy support
|
|
32
|
+
return self.model
|
|
33
|
+
|
|
34
|
+
@overload
|
|
35
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
|
|
36
|
+
|
|
37
|
+
@overload
|
|
38
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
|
|
39
|
+
|
|
40
|
+
async def ainvoke(
|
|
41
|
+
self, messages: list[BaseMessage], output_format: type[T] | None = None
|
|
42
|
+
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]: ...
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def __get_pydantic_core_schema__(
|
|
46
|
+
cls,
|
|
47
|
+
source_type: type,
|
|
48
|
+
handler: Any,
|
|
49
|
+
) -> Any:
|
|
50
|
+
"""
|
|
51
|
+
Allow this Protocol to be used in Pydantic models -> very useful to typesafe the agent settings for example.
|
|
52
|
+
Returns a schema that allows any object (since this is a Protocol).
|
|
53
|
+
"""
|
|
54
|
+
from pydantic_core import core_schema
|
|
55
|
+
|
|
56
|
+
# Return a schema that accepts any object for Protocol types
|
|
57
|
+
return core_schema.any_schema()
|