optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,97 @@
1
+ from collections.abc import Mapping
2
+ from dataclasses import dataclass
3
+ from typing import Any, TypeVar, overload
4
+
5
+ import httpx
6
+ from ollama import AsyncClient as OllamaAsyncClient
7
+ from ollama import Options
8
+ from pydantic import BaseModel
9
+
10
+ from browser_use.llm.base import BaseChatModel
11
+ from browser_use.llm.exceptions import ModelProviderError
12
+ from browser_use.llm.messages import BaseMessage
13
+ from browser_use.llm.ollama.serializer import OllamaMessageSerializer
14
+ from browser_use.llm.views import ChatInvokeCompletion
15
+
16
+ T = TypeVar('T', bound=BaseModel)
17
+
18
+
19
+ @dataclass
20
+ class ChatOllama(BaseChatModel):
21
+ """
22
+ A wrapper around Ollama's chat model.
23
+ """
24
+
25
+ model: str
26
+
27
+ # # Model params
28
+ # TODO (matic): Why is this commented out?
29
+ # temperature: float | None = None
30
+
31
+ # Client initialization parameters
32
+ host: str | None = None
33
+ timeout: float | httpx.Timeout | None = None
34
+ client_params: dict[str, Any] | None = None
35
+ ollama_options: Mapping[str, Any] | Options | None = None
36
+
37
+ # Static
38
+ @property
39
+ def provider(self) -> str:
40
+ return 'ollama'
41
+
42
+ def _get_client_params(self) -> dict[str, Any]:
43
+ """Prepare client parameters dictionary."""
44
+ return {
45
+ 'host': self.host,
46
+ 'timeout': self.timeout,
47
+ 'client_params': self.client_params,
48
+ }
49
+
50
+ def get_client(self) -> OllamaAsyncClient:
51
+ """
52
+ Returns an OllamaAsyncClient client.
53
+ """
54
+ return OllamaAsyncClient(host=self.host, timeout=self.timeout, **self.client_params or {})
55
+
56
+ @property
57
+ def name(self) -> str:
58
+ return self.model
59
+
60
+ @overload
61
+ async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
62
+
63
+ @overload
64
+ async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
65
+
66
+ async def ainvoke(
67
+ self, messages: list[BaseMessage], output_format: type[T] | None = None
68
+ ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
69
+ ollama_messages = OllamaMessageSerializer.serialize_messages(messages)
70
+
71
+ try:
72
+ if output_format is None:
73
+ response = await self.get_client().chat(
74
+ model=self.model,
75
+ messages=ollama_messages,
76
+ options=self.ollama_options,
77
+ )
78
+
79
+ return ChatInvokeCompletion(completion=response.message.content or '', usage=None)
80
+ else:
81
+ schema = output_format.model_json_schema()
82
+
83
+ response = await self.get_client().chat(
84
+ model=self.model,
85
+ messages=ollama_messages,
86
+ format=schema,
87
+ options=self.ollama_options,
88
+ )
89
+
90
+ completion = response.message.content or ''
91
+ if output_format is not None:
92
+ completion = output_format.model_validate_json(completion)
93
+
94
+ return ChatInvokeCompletion(completion=completion, usage=None)
95
+
96
+ except Exception as e:
97
+ raise ModelProviderError(message=str(e), model=self.name) from e
@@ -0,0 +1,143 @@
1
+ import base64
2
+ import json
3
+ from typing import Any, overload
4
+
5
+ from ollama._types import Image, Message
6
+
7
+ from browser_use.llm.messages import (
8
+ AssistantMessage,
9
+ BaseMessage,
10
+ SystemMessage,
11
+ ToolCall,
12
+ UserMessage,
13
+ )
14
+
15
+
16
+ class OllamaMessageSerializer:
17
+ """Serializer for converting between custom message types and Ollama message types."""
18
+
19
+ @staticmethod
20
+ def _extract_text_content(content: Any) -> str:
21
+ """Extract text content from message content, ignoring images."""
22
+ if content is None:
23
+ return ''
24
+ if isinstance(content, str):
25
+ return content
26
+
27
+ text_parts: list[str] = []
28
+ for part in content:
29
+ if hasattr(part, 'type'):
30
+ if part.type == 'text':
31
+ text_parts.append(part.text)
32
+ elif part.type == 'refusal':
33
+ text_parts.append(f'[Refusal] {part.refusal}')
34
+ # Skip image parts as they're handled separately
35
+
36
+ return '\n'.join(text_parts)
37
+
38
+ @staticmethod
39
+ def _extract_images(content: Any) -> list[Image]:
40
+ """Extract images from message content."""
41
+ if content is None or isinstance(content, str):
42
+ return []
43
+
44
+ images: list[Image] = []
45
+ for part in content:
46
+ if hasattr(part, 'type') and part.type == 'image_url':
47
+ url = part.image_url.url
48
+ if url.startswith('data:'):
49
+ # Handle base64 encoded images
50
+ # Format: data:image/jpeg;base64,<data>
51
+ _, data = url.split(',', 1)
52
+ # Decode base64 to bytes
53
+ image_bytes = base64.b64decode(data)
54
+ images.append(Image(value=image_bytes))
55
+ else:
56
+ # Handle URL images (Ollama will download them)
57
+ images.append(Image(value=url))
58
+
59
+ return images
60
+
61
+ @staticmethod
62
+ def _serialize_tool_calls(tool_calls: list[ToolCall]) -> list[Message.ToolCall]:
63
+ """Convert browser-use ToolCalls to Ollama ToolCalls."""
64
+ ollama_tool_calls: list[Message.ToolCall] = []
65
+
66
+ for tool_call in tool_calls:
67
+ # Parse arguments from JSON string to dict for Ollama
68
+ try:
69
+ arguments_dict = json.loads(tool_call.function.arguments)
70
+ except json.JSONDecodeError:
71
+ # If parsing fails, wrap in a dict
72
+ arguments_dict = {'arguments': tool_call.function.arguments}
73
+
74
+ ollama_tool_call = Message.ToolCall(
75
+ function=Message.ToolCall.Function(name=tool_call.function.name, arguments=arguments_dict)
76
+ )
77
+ ollama_tool_calls.append(ollama_tool_call)
78
+
79
+ return ollama_tool_calls
80
+
81
+ # region - Serialize overloads
82
+ @overload
83
+ @staticmethod
84
+ def serialize(message: UserMessage) -> Message: ...
85
+
86
+ @overload
87
+ @staticmethod
88
+ def serialize(message: SystemMessage) -> Message: ...
89
+
90
+ @overload
91
+ @staticmethod
92
+ def serialize(message: AssistantMessage) -> Message: ...
93
+
94
+ @staticmethod
95
+ def serialize(message: BaseMessage) -> Message:
96
+ """Serialize a custom message to an Ollama Message."""
97
+
98
+ if isinstance(message, UserMessage):
99
+ text_content = OllamaMessageSerializer._extract_text_content(message.content)
100
+ images = OllamaMessageSerializer._extract_images(message.content)
101
+
102
+ ollama_message = Message(
103
+ role='user',
104
+ content=text_content if text_content else None,
105
+ )
106
+
107
+ if images:
108
+ ollama_message.images = images
109
+
110
+ return ollama_message
111
+
112
+ elif isinstance(message, SystemMessage):
113
+ text_content = OllamaMessageSerializer._extract_text_content(message.content)
114
+
115
+ return Message(
116
+ role='system',
117
+ content=text_content if text_content else None,
118
+ )
119
+
120
+ elif isinstance(message, AssistantMessage):
121
+ # Handle content
122
+ text_content = None
123
+ if message.content is not None:
124
+ text_content = OllamaMessageSerializer._extract_text_content(message.content)
125
+
126
+ ollama_message = Message(
127
+ role='assistant',
128
+ content=text_content if text_content else None,
129
+ )
130
+
131
+ # Handle tool calls
132
+ if message.tool_calls:
133
+ ollama_message.tool_calls = OllamaMessageSerializer._serialize_tool_calls(message.tool_calls)
134
+
135
+ return ollama_message
136
+
137
+ else:
138
+ raise ValueError(f'Unknown message type: {type(message)}')
139
+
140
+ @staticmethod
141
+ def serialize_messages(messages: list[BaseMessage]) -> list[Message]:
142
+ """Serialize a list of browser_use messages to Ollama Messages."""
143
+ return [OllamaMessageSerializer.serialize(m) for m in messages]
@@ -0,0 +1,264 @@
1
+ from collections.abc import Iterable, Mapping
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Literal, TypeVar, overload
4
+
5
+ import httpx
6
+ from openai import APIConnectionError, APIStatusError, AsyncOpenAI, RateLimitError
7
+ from openai.types.chat import ChatCompletionContentPartTextParam
8
+ from openai.types.chat.chat_completion import ChatCompletion
9
+ from openai.types.shared.chat_model import ChatModel
10
+ from openai.types.shared_params.reasoning_effort import ReasoningEffort
11
+ from openai.types.shared_params.response_format_json_schema import JSONSchema, ResponseFormatJSONSchema
12
+ from pydantic import BaseModel
13
+
14
+ from browser_use.llm.base import BaseChatModel
15
+ from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
16
+ from browser_use.llm.messages import BaseMessage
17
+ from browser_use.llm.openai.serializer import OpenAIMessageSerializer
18
+ from browser_use.llm.schema import SchemaOptimizer
19
+ from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
20
+
21
+ T = TypeVar('T', bound=BaseModel)
22
+
23
+
24
+ @dataclass
25
+ class ChatOpenAI(BaseChatModel):
26
+ """
27
+ A wrapper around AsyncOpenAI that implements the BaseLLM protocol.
28
+
29
+ This class accepts all AsyncOpenAI parameters while adding model
30
+ and temperature parameters for the LLM interface (if temperature it not `None`).
31
+ """
32
+
33
+ # Model configuration
34
+ model: ChatModel | str
35
+
36
+ # Model params
37
+ temperature: float | None = 0.2
38
+ frequency_penalty: float | None = 0.3 # this avoids infinite generation of \t for models like 4.1-mini
39
+ reasoning_effort: ReasoningEffort = 'low'
40
+ seed: int | None = None
41
+ service_tier: Literal['auto', 'default', 'flex', 'priority', 'scale'] | None = None
42
+ top_p: float | None = None
43
+ add_schema_to_system_prompt: bool = False # Add JSON schema to system prompt instead of using response_format
44
+ dont_force_structured_output: bool = False # If True, the model will not be forced to output a structured output
45
+
46
+ # Client initialization parameters
47
+ api_key: str | None = None
48
+ organization: str | None = None
49
+ project: str | None = None
50
+ base_url: str | httpx.URL | None = None
51
+ websocket_base_url: str | httpx.URL | None = None
52
+ timeout: float | httpx.Timeout | None = None
53
+ max_retries: int = 5 # Increase default retries for automation reliability
54
+ default_headers: Mapping[str, str] | None = None
55
+ default_query: Mapping[str, object] | None = None
56
+ http_client: httpx.AsyncClient | None = None
57
+ _strict_response_validation: bool = False
58
+ max_completion_tokens: int | None = 4096
59
+ reasoning_models: list[ChatModel | str] | None = field(
60
+ default_factory=lambda: [
61
+ 'o4-mini',
62
+ 'o3',
63
+ 'o3-mini',
64
+ 'o1',
65
+ 'o1-pro',
66
+ 'o3-pro',
67
+ 'gpt-5',
68
+ 'gpt-5-mini',
69
+ 'gpt-5-nano',
70
+ ]
71
+ )
72
+
73
+ # Static
74
+ @property
75
+ def provider(self) -> str:
76
+ return 'openai'
77
+
78
+ def _get_client_params(self) -> dict[str, Any]:
79
+ """Prepare client parameters dictionary."""
80
+ # Define base client params
81
+ base_params = {
82
+ 'api_key': self.api_key,
83
+ 'organization': self.organization,
84
+ 'project': self.project,
85
+ 'base_url': self.base_url,
86
+ 'websocket_base_url': self.websocket_base_url,
87
+ 'timeout': self.timeout,
88
+ 'max_retries': self.max_retries,
89
+ 'default_headers': self.default_headers,
90
+ 'default_query': self.default_query,
91
+ '_strict_response_validation': self._strict_response_validation,
92
+ }
93
+
94
+ # Create client_params dict with non-None values
95
+ client_params = {k: v for k, v in base_params.items() if v is not None}
96
+
97
+ # Add http_client if provided
98
+ if self.http_client is not None:
99
+ client_params['http_client'] = self.http_client
100
+
101
+ return client_params
102
+
103
+ def get_client(self) -> AsyncOpenAI:
104
+ """
105
+ Returns an AsyncOpenAI client.
106
+
107
+ Returns:
108
+ AsyncOpenAI: An instance of the AsyncOpenAI client.
109
+ """
110
+ client_params = self._get_client_params()
111
+ return AsyncOpenAI(**client_params)
112
+
113
+ @property
114
+ def name(self) -> str:
115
+ return str(self.model)
116
+
117
+ def _get_usage(self, response: ChatCompletion) -> ChatInvokeUsage | None:
118
+ if response.usage is not None:
119
+ completion_tokens = response.usage.completion_tokens
120
+ completion_token_details = response.usage.completion_tokens_details
121
+ if completion_token_details is not None:
122
+ reasoning_tokens = completion_token_details.reasoning_tokens
123
+ if reasoning_tokens is not None:
124
+ completion_tokens += reasoning_tokens
125
+
126
+ usage = ChatInvokeUsage(
127
+ prompt_tokens=response.usage.prompt_tokens,
128
+ prompt_cached_tokens=response.usage.prompt_tokens_details.cached_tokens
129
+ if response.usage.prompt_tokens_details is not None
130
+ else None,
131
+ prompt_cache_creation_tokens=None,
132
+ prompt_image_tokens=None,
133
+ # Completion
134
+ completion_tokens=completion_tokens,
135
+ total_tokens=response.usage.total_tokens,
136
+ )
137
+ else:
138
+ usage = None
139
+
140
+ return usage
141
+
142
+ @overload
143
+ async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
144
+
145
+ @overload
146
+ async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
147
+
148
+ async def ainvoke(
149
+ self, messages: list[BaseMessage], output_format: type[T] | None = None
150
+ ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
151
+ """
152
+ Invoke the model with the given messages.
153
+
154
+ Args:
155
+ messages: List of chat messages
156
+ output_format: Optional Pydantic model class for structured output
157
+
158
+ Returns:
159
+ Either a string response or an instance of output_format
160
+ """
161
+
162
+ openai_messages = OpenAIMessageSerializer.serialize_messages(messages)
163
+
164
+ try:
165
+ model_params: dict[str, Any] = {}
166
+
167
+ if self.temperature is not None:
168
+ model_params['temperature'] = self.temperature
169
+
170
+ if self.frequency_penalty is not None:
171
+ model_params['frequency_penalty'] = self.frequency_penalty
172
+
173
+ if self.max_completion_tokens is not None:
174
+ model_params['max_completion_tokens'] = self.max_completion_tokens
175
+
176
+ if self.top_p is not None:
177
+ model_params['top_p'] = self.top_p
178
+
179
+ if self.seed is not None:
180
+ model_params['seed'] = self.seed
181
+
182
+ if self.service_tier is not None:
183
+ model_params['service_tier'] = self.service_tier
184
+
185
+ if self.reasoning_models and any(str(m).lower() in str(self.model).lower() for m in self.reasoning_models):
186
+ model_params['reasoning_effort'] = self.reasoning_effort
187
+ del model_params['temperature']
188
+ del model_params['frequency_penalty']
189
+
190
+ if output_format is None:
191
+ # Return string response
192
+ response = await self.get_client().chat.completions.create(
193
+ model=self.model,
194
+ messages=openai_messages,
195
+ **model_params,
196
+ )
197
+
198
+ usage = self._get_usage(response)
199
+ return ChatInvokeCompletion(
200
+ completion=response.choices[0].message.content or '',
201
+ usage=usage,
202
+ stop_reason=response.choices[0].finish_reason if response.choices else None,
203
+ )
204
+
205
+ else:
206
+ response_format: JSONSchema = {
207
+ 'name': 'agent_output',
208
+ 'strict': True,
209
+ 'schema': SchemaOptimizer.create_optimized_json_schema(output_format),
210
+ }
211
+
212
+ # Add JSON schema to system prompt if requested
213
+ if self.add_schema_to_system_prompt and openai_messages and openai_messages[0]['role'] == 'system':
214
+ schema_text = f'\n<json_schema>\n{response_format}\n</json_schema>'
215
+ if isinstance(openai_messages[0]['content'], str):
216
+ openai_messages[0]['content'] += schema_text
217
+ elif isinstance(openai_messages[0]['content'], Iterable):
218
+ openai_messages[0]['content'] = list(openai_messages[0]['content']) + [
219
+ ChatCompletionContentPartTextParam(text=schema_text, type='text')
220
+ ]
221
+
222
+ if self.dont_force_structured_output:
223
+ response = await self.get_client().chat.completions.create(
224
+ model=self.model,
225
+ messages=openai_messages,
226
+ **model_params,
227
+ )
228
+ else:
229
+ # Return structured response
230
+ response = await self.get_client().chat.completions.create(
231
+ model=self.model,
232
+ messages=openai_messages,
233
+ response_format=ResponseFormatJSONSchema(json_schema=response_format, type='json_schema'),
234
+ **model_params,
235
+ )
236
+
237
+ if response.choices[0].message.content is None:
238
+ raise ModelProviderError(
239
+ message='Failed to parse structured output from model response',
240
+ status_code=500,
241
+ model=self.name,
242
+ )
243
+
244
+ usage = self._get_usage(response)
245
+
246
+ parsed = output_format.model_validate_json(response.choices[0].message.content)
247
+
248
+ return ChatInvokeCompletion(
249
+ completion=parsed,
250
+ usage=usage,
251
+ stop_reason=response.choices[0].finish_reason if response.choices else None,
252
+ )
253
+
254
+ except RateLimitError as e:
255
+ raise ModelRateLimitError(message=e.message, model=self.name) from e
256
+
257
+ except APIConnectionError as e:
258
+ raise ModelProviderError(message=str(e), model=self.name) from e
259
+
260
+ except APIStatusError as e:
261
+ raise ModelProviderError(message=e.message, status_code=e.status_code, model=self.name) from e
262
+
263
+ except Exception as e:
264
+ raise ModelProviderError(message=str(e), model=self.name) from e
@@ -0,0 +1,15 @@
1
+ from dataclasses import dataclass
2
+
3
+ from browser_use.llm.openai.chat import ChatOpenAI
4
+
5
+
6
+ @dataclass
7
+ class ChatOpenAILike(ChatOpenAI):
8
+ """
9
+ A class for to interact with any provider using the OpenAI API schema.
10
+
11
+ Args:
12
+ model (str): The name of the OpenAI model to use.
13
+ """
14
+
15
+ model: str
@@ -0,0 +1,165 @@
1
+ from typing import overload
2
+
3
+ from openai.types.chat import (
4
+ ChatCompletionAssistantMessageParam,
5
+ ChatCompletionContentPartImageParam,
6
+ ChatCompletionContentPartRefusalParam,
7
+ ChatCompletionContentPartTextParam,
8
+ ChatCompletionMessageFunctionToolCallParam,
9
+ ChatCompletionMessageParam,
10
+ ChatCompletionSystemMessageParam,
11
+ ChatCompletionUserMessageParam,
12
+ )
13
+ from openai.types.chat.chat_completion_content_part_image_param import ImageURL
14
+ from openai.types.chat.chat_completion_message_function_tool_call_param import Function
15
+
16
+ from browser_use.llm.messages import (
17
+ AssistantMessage,
18
+ BaseMessage,
19
+ ContentPartImageParam,
20
+ ContentPartRefusalParam,
21
+ ContentPartTextParam,
22
+ SystemMessage,
23
+ ToolCall,
24
+ UserMessage,
25
+ )
26
+
27
+
28
+ class OpenAIMessageSerializer:
29
+ """Serializer for converting between custom message types and OpenAI message param types."""
30
+
31
+ @staticmethod
32
+ def _serialize_content_part_text(part: ContentPartTextParam) -> ChatCompletionContentPartTextParam:
33
+ return ChatCompletionContentPartTextParam(text=part.text, type='text')
34
+
35
+ @staticmethod
36
+ def _serialize_content_part_image(part: ContentPartImageParam) -> ChatCompletionContentPartImageParam:
37
+ return ChatCompletionContentPartImageParam(
38
+ image_url=ImageURL(url=part.image_url.url, detail=part.image_url.detail),
39
+ type='image_url',
40
+ )
41
+
42
+ @staticmethod
43
+ def _serialize_content_part_refusal(part: ContentPartRefusalParam) -> ChatCompletionContentPartRefusalParam:
44
+ return ChatCompletionContentPartRefusalParam(refusal=part.refusal, type='refusal')
45
+
46
+ @staticmethod
47
+ def _serialize_user_content(
48
+ content: str | list[ContentPartTextParam | ContentPartImageParam],
49
+ ) -> str | list[ChatCompletionContentPartTextParam | ChatCompletionContentPartImageParam]:
50
+ """Serialize content for user messages (text and images allowed)."""
51
+ if isinstance(content, str):
52
+ return content
53
+
54
+ serialized_parts: list[ChatCompletionContentPartTextParam | ChatCompletionContentPartImageParam] = []
55
+ for part in content:
56
+ if part.type == 'text':
57
+ serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_text(part))
58
+ elif part.type == 'image_url':
59
+ serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_image(part))
60
+ return serialized_parts
61
+
62
+ @staticmethod
63
+ def _serialize_system_content(
64
+ content: str | list[ContentPartTextParam],
65
+ ) -> str | list[ChatCompletionContentPartTextParam]:
66
+ """Serialize content for system messages (text only)."""
67
+ if isinstance(content, str):
68
+ return content
69
+
70
+ serialized_parts: list[ChatCompletionContentPartTextParam] = []
71
+ for part in content:
72
+ if part.type == 'text':
73
+ serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_text(part))
74
+ return serialized_parts
75
+
76
+ @staticmethod
77
+ def _serialize_assistant_content(
78
+ content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None,
79
+ ) -> str | list[ChatCompletionContentPartTextParam | ChatCompletionContentPartRefusalParam] | None:
80
+ """Serialize content for assistant messages (text and refusal allowed)."""
81
+ if content is None:
82
+ return None
83
+ if isinstance(content, str):
84
+ return content
85
+
86
+ serialized_parts: list[ChatCompletionContentPartTextParam | ChatCompletionContentPartRefusalParam] = []
87
+ for part in content:
88
+ if part.type == 'text':
89
+ serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_text(part))
90
+ elif part.type == 'refusal':
91
+ serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_refusal(part))
92
+ return serialized_parts
93
+
94
+ @staticmethod
95
+ def _serialize_tool_call(tool_call: ToolCall) -> ChatCompletionMessageFunctionToolCallParam:
96
+ return ChatCompletionMessageFunctionToolCallParam(
97
+ id=tool_call.id,
98
+ function=Function(name=tool_call.function.name, arguments=tool_call.function.arguments),
99
+ type='function',
100
+ )
101
+
102
+ # endregion
103
+
104
+ # region - Serialize overloads
105
+ @overload
106
+ @staticmethod
107
+ def serialize(message: UserMessage) -> ChatCompletionUserMessageParam: ...
108
+
109
+ @overload
110
+ @staticmethod
111
+ def serialize(message: SystemMessage) -> ChatCompletionSystemMessageParam: ...
112
+
113
+ @overload
114
+ @staticmethod
115
+ def serialize(message: AssistantMessage) -> ChatCompletionAssistantMessageParam: ...
116
+
117
+ @staticmethod
118
+ def serialize(message: BaseMessage) -> ChatCompletionMessageParam:
119
+ """Serialize a custom message to an OpenAI message param."""
120
+
121
+ if isinstance(message, UserMessage):
122
+ user_result: ChatCompletionUserMessageParam = {
123
+ 'role': 'user',
124
+ 'content': OpenAIMessageSerializer._serialize_user_content(message.content),
125
+ }
126
+ if message.name is not None:
127
+ user_result['name'] = message.name
128
+ return user_result
129
+
130
+ elif isinstance(message, SystemMessage):
131
+ system_result: ChatCompletionSystemMessageParam = {
132
+ 'role': 'system',
133
+ 'content': OpenAIMessageSerializer._serialize_system_content(message.content),
134
+ }
135
+ if message.name is not None:
136
+ system_result['name'] = message.name
137
+ return system_result
138
+
139
+ elif isinstance(message, AssistantMessage):
140
+ # Handle content serialization
141
+ content = None
142
+ if message.content is not None:
143
+ content = OpenAIMessageSerializer._serialize_assistant_content(message.content)
144
+
145
+ assistant_result: ChatCompletionAssistantMessageParam = {'role': 'assistant'}
146
+
147
+ # Only add content if it's not None
148
+ if content is not None:
149
+ assistant_result['content'] = content
150
+
151
+ if message.name is not None:
152
+ assistant_result['name'] = message.name
153
+ if message.refusal is not None:
154
+ assistant_result['refusal'] = message.refusal
155
+ if message.tool_calls:
156
+ assistant_result['tool_calls'] = [OpenAIMessageSerializer._serialize_tool_call(tc) for tc in message.tool_calls]
157
+
158
+ return assistant_result
159
+
160
+ else:
161
+ raise ValueError(f'Unknown message type: {type(message)}')
162
+
163
+ @staticmethod
164
+ def serialize_messages(messages: list[BaseMessage]) -> list[ChatCompletionMessageParam]:
165
+ return [OpenAIMessageSerializer.serialize(m) for m in messages]