optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from collections.abc import Mapping
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, TypeVar, overload
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
from ollama import AsyncClient as OllamaAsyncClient
|
|
7
|
+
from ollama import Options
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from browser_use.llm.base import BaseChatModel
|
|
11
|
+
from browser_use.llm.exceptions import ModelProviderError
|
|
12
|
+
from browser_use.llm.messages import BaseMessage
|
|
13
|
+
from browser_use.llm.ollama.serializer import OllamaMessageSerializer
|
|
14
|
+
from browser_use.llm.views import ChatInvokeCompletion
|
|
15
|
+
|
|
16
|
+
T = TypeVar('T', bound=BaseModel)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ChatOllama(BaseChatModel):
|
|
21
|
+
"""
|
|
22
|
+
A wrapper around Ollama's chat model.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
model: str
|
|
26
|
+
|
|
27
|
+
# # Model params
|
|
28
|
+
# TODO (matic): Why is this commented out?
|
|
29
|
+
# temperature: float | None = None
|
|
30
|
+
|
|
31
|
+
# Client initialization parameters
|
|
32
|
+
host: str | None = None
|
|
33
|
+
timeout: float | httpx.Timeout | None = None
|
|
34
|
+
client_params: dict[str, Any] | None = None
|
|
35
|
+
ollama_options: Mapping[str, Any] | Options | None = None
|
|
36
|
+
|
|
37
|
+
# Static
|
|
38
|
+
@property
|
|
39
|
+
def provider(self) -> str:
|
|
40
|
+
return 'ollama'
|
|
41
|
+
|
|
42
|
+
def _get_client_params(self) -> dict[str, Any]:
|
|
43
|
+
"""Prepare client parameters dictionary."""
|
|
44
|
+
return {
|
|
45
|
+
'host': self.host,
|
|
46
|
+
'timeout': self.timeout,
|
|
47
|
+
'client_params': self.client_params,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
def get_client(self) -> OllamaAsyncClient:
|
|
51
|
+
"""
|
|
52
|
+
Returns an OllamaAsyncClient client.
|
|
53
|
+
"""
|
|
54
|
+
return OllamaAsyncClient(host=self.host, timeout=self.timeout, **self.client_params or {})
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def name(self) -> str:
|
|
58
|
+
return self.model
|
|
59
|
+
|
|
60
|
+
@overload
|
|
61
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
|
|
62
|
+
|
|
63
|
+
@overload
|
|
64
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
|
|
65
|
+
|
|
66
|
+
async def ainvoke(
|
|
67
|
+
self, messages: list[BaseMessage], output_format: type[T] | None = None
|
|
68
|
+
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
|
|
69
|
+
ollama_messages = OllamaMessageSerializer.serialize_messages(messages)
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
if output_format is None:
|
|
73
|
+
response = await self.get_client().chat(
|
|
74
|
+
model=self.model,
|
|
75
|
+
messages=ollama_messages,
|
|
76
|
+
options=self.ollama_options,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return ChatInvokeCompletion(completion=response.message.content or '', usage=None)
|
|
80
|
+
else:
|
|
81
|
+
schema = output_format.model_json_schema()
|
|
82
|
+
|
|
83
|
+
response = await self.get_client().chat(
|
|
84
|
+
model=self.model,
|
|
85
|
+
messages=ollama_messages,
|
|
86
|
+
format=schema,
|
|
87
|
+
options=self.ollama_options,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
completion = response.message.content or ''
|
|
91
|
+
if output_format is not None:
|
|
92
|
+
completion = output_format.model_validate_json(completion)
|
|
93
|
+
|
|
94
|
+
return ChatInvokeCompletion(completion=completion, usage=None)
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
raise ModelProviderError(message=str(e), model=self.name) from e
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, overload
|
|
4
|
+
|
|
5
|
+
from ollama._types import Image, Message
|
|
6
|
+
|
|
7
|
+
from browser_use.llm.messages import (
|
|
8
|
+
AssistantMessage,
|
|
9
|
+
BaseMessage,
|
|
10
|
+
SystemMessage,
|
|
11
|
+
ToolCall,
|
|
12
|
+
UserMessage,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OllamaMessageSerializer:
|
|
17
|
+
"""Serializer for converting between custom message types and Ollama message types."""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _extract_text_content(content: Any) -> str:
|
|
21
|
+
"""Extract text content from message content, ignoring images."""
|
|
22
|
+
if content is None:
|
|
23
|
+
return ''
|
|
24
|
+
if isinstance(content, str):
|
|
25
|
+
return content
|
|
26
|
+
|
|
27
|
+
text_parts: list[str] = []
|
|
28
|
+
for part in content:
|
|
29
|
+
if hasattr(part, 'type'):
|
|
30
|
+
if part.type == 'text':
|
|
31
|
+
text_parts.append(part.text)
|
|
32
|
+
elif part.type == 'refusal':
|
|
33
|
+
text_parts.append(f'[Refusal] {part.refusal}')
|
|
34
|
+
# Skip image parts as they're handled separately
|
|
35
|
+
|
|
36
|
+
return '\n'.join(text_parts)
|
|
37
|
+
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _extract_images(content: Any) -> list[Image]:
|
|
40
|
+
"""Extract images from message content."""
|
|
41
|
+
if content is None or isinstance(content, str):
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
images: list[Image] = []
|
|
45
|
+
for part in content:
|
|
46
|
+
if hasattr(part, 'type') and part.type == 'image_url':
|
|
47
|
+
url = part.image_url.url
|
|
48
|
+
if url.startswith('data:'):
|
|
49
|
+
# Handle base64 encoded images
|
|
50
|
+
# Format: data:image/jpeg;base64,<data>
|
|
51
|
+
_, data = url.split(',', 1)
|
|
52
|
+
# Decode base64 to bytes
|
|
53
|
+
image_bytes = base64.b64decode(data)
|
|
54
|
+
images.append(Image(value=image_bytes))
|
|
55
|
+
else:
|
|
56
|
+
# Handle URL images (Ollama will download them)
|
|
57
|
+
images.append(Image(value=url))
|
|
58
|
+
|
|
59
|
+
return images
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def _serialize_tool_calls(tool_calls: list[ToolCall]) -> list[Message.ToolCall]:
|
|
63
|
+
"""Convert browser-use ToolCalls to Ollama ToolCalls."""
|
|
64
|
+
ollama_tool_calls: list[Message.ToolCall] = []
|
|
65
|
+
|
|
66
|
+
for tool_call in tool_calls:
|
|
67
|
+
# Parse arguments from JSON string to dict for Ollama
|
|
68
|
+
try:
|
|
69
|
+
arguments_dict = json.loads(tool_call.function.arguments)
|
|
70
|
+
except json.JSONDecodeError:
|
|
71
|
+
# If parsing fails, wrap in a dict
|
|
72
|
+
arguments_dict = {'arguments': tool_call.function.arguments}
|
|
73
|
+
|
|
74
|
+
ollama_tool_call = Message.ToolCall(
|
|
75
|
+
function=Message.ToolCall.Function(name=tool_call.function.name, arguments=arguments_dict)
|
|
76
|
+
)
|
|
77
|
+
ollama_tool_calls.append(ollama_tool_call)
|
|
78
|
+
|
|
79
|
+
return ollama_tool_calls
|
|
80
|
+
|
|
81
|
+
# region - Serialize overloads
|
|
82
|
+
@overload
|
|
83
|
+
@staticmethod
|
|
84
|
+
def serialize(message: UserMessage) -> Message: ...
|
|
85
|
+
|
|
86
|
+
@overload
|
|
87
|
+
@staticmethod
|
|
88
|
+
def serialize(message: SystemMessage) -> Message: ...
|
|
89
|
+
|
|
90
|
+
@overload
|
|
91
|
+
@staticmethod
|
|
92
|
+
def serialize(message: AssistantMessage) -> Message: ...
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def serialize(message: BaseMessage) -> Message:
|
|
96
|
+
"""Serialize a custom message to an Ollama Message."""
|
|
97
|
+
|
|
98
|
+
if isinstance(message, UserMessage):
|
|
99
|
+
text_content = OllamaMessageSerializer._extract_text_content(message.content)
|
|
100
|
+
images = OllamaMessageSerializer._extract_images(message.content)
|
|
101
|
+
|
|
102
|
+
ollama_message = Message(
|
|
103
|
+
role='user',
|
|
104
|
+
content=text_content if text_content else None,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if images:
|
|
108
|
+
ollama_message.images = images
|
|
109
|
+
|
|
110
|
+
return ollama_message
|
|
111
|
+
|
|
112
|
+
elif isinstance(message, SystemMessage):
|
|
113
|
+
text_content = OllamaMessageSerializer._extract_text_content(message.content)
|
|
114
|
+
|
|
115
|
+
return Message(
|
|
116
|
+
role='system',
|
|
117
|
+
content=text_content if text_content else None,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
elif isinstance(message, AssistantMessage):
|
|
121
|
+
# Handle content
|
|
122
|
+
text_content = None
|
|
123
|
+
if message.content is not None:
|
|
124
|
+
text_content = OllamaMessageSerializer._extract_text_content(message.content)
|
|
125
|
+
|
|
126
|
+
ollama_message = Message(
|
|
127
|
+
role='assistant',
|
|
128
|
+
content=text_content if text_content else None,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Handle tool calls
|
|
132
|
+
if message.tool_calls:
|
|
133
|
+
ollama_message.tool_calls = OllamaMessageSerializer._serialize_tool_calls(message.tool_calls)
|
|
134
|
+
|
|
135
|
+
return ollama_message
|
|
136
|
+
|
|
137
|
+
else:
|
|
138
|
+
raise ValueError(f'Unknown message type: {type(message)}')
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def serialize_messages(messages: list[BaseMessage]) -> list[Message]:
|
|
142
|
+
"""Serialize a list of browser_use messages to Ollama Messages."""
|
|
143
|
+
return [OllamaMessageSerializer.serialize(m) for m in messages]
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
from collections.abc import Iterable, Mapping
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any, Literal, TypeVar, overload
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
from openai import APIConnectionError, APIStatusError, AsyncOpenAI, RateLimitError
|
|
7
|
+
from openai.types.chat import ChatCompletionContentPartTextParam
|
|
8
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
|
9
|
+
from openai.types.shared.chat_model import ChatModel
|
|
10
|
+
from openai.types.shared_params.reasoning_effort import ReasoningEffort
|
|
11
|
+
from openai.types.shared_params.response_format_json_schema import JSONSchema, ResponseFormatJSONSchema
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from browser_use.llm.base import BaseChatModel
|
|
15
|
+
from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
|
|
16
|
+
from browser_use.llm.messages import BaseMessage
|
|
17
|
+
from browser_use.llm.openai.serializer import OpenAIMessageSerializer
|
|
18
|
+
from browser_use.llm.schema import SchemaOptimizer
|
|
19
|
+
from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
|
|
20
|
+
|
|
21
|
+
T = TypeVar('T', bound=BaseModel)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class ChatOpenAI(BaseChatModel):
|
|
26
|
+
"""
|
|
27
|
+
A wrapper around AsyncOpenAI that implements the BaseLLM protocol.
|
|
28
|
+
|
|
29
|
+
This class accepts all AsyncOpenAI parameters while adding model
|
|
30
|
+
and temperature parameters for the LLM interface (if temperature it not `None`).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# Model configuration
|
|
34
|
+
model: ChatModel | str
|
|
35
|
+
|
|
36
|
+
# Model params
|
|
37
|
+
temperature: float | None = 0.2
|
|
38
|
+
frequency_penalty: float | None = 0.3 # this avoids infinite generation of \t for models like 4.1-mini
|
|
39
|
+
reasoning_effort: ReasoningEffort = 'low'
|
|
40
|
+
seed: int | None = None
|
|
41
|
+
service_tier: Literal['auto', 'default', 'flex', 'priority', 'scale'] | None = None
|
|
42
|
+
top_p: float | None = None
|
|
43
|
+
add_schema_to_system_prompt: bool = False # Add JSON schema to system prompt instead of using response_format
|
|
44
|
+
dont_force_structured_output: bool = False # If True, the model will not be forced to output a structured output
|
|
45
|
+
|
|
46
|
+
# Client initialization parameters
|
|
47
|
+
api_key: str | None = None
|
|
48
|
+
organization: str | None = None
|
|
49
|
+
project: str | None = None
|
|
50
|
+
base_url: str | httpx.URL | None = None
|
|
51
|
+
websocket_base_url: str | httpx.URL | None = None
|
|
52
|
+
timeout: float | httpx.Timeout | None = None
|
|
53
|
+
max_retries: int = 5 # Increase default retries for automation reliability
|
|
54
|
+
default_headers: Mapping[str, str] | None = None
|
|
55
|
+
default_query: Mapping[str, object] | None = None
|
|
56
|
+
http_client: httpx.AsyncClient | None = None
|
|
57
|
+
_strict_response_validation: bool = False
|
|
58
|
+
max_completion_tokens: int | None = 4096
|
|
59
|
+
reasoning_models: list[ChatModel | str] | None = field(
|
|
60
|
+
default_factory=lambda: [
|
|
61
|
+
'o4-mini',
|
|
62
|
+
'o3',
|
|
63
|
+
'o3-mini',
|
|
64
|
+
'o1',
|
|
65
|
+
'o1-pro',
|
|
66
|
+
'o3-pro',
|
|
67
|
+
'gpt-5',
|
|
68
|
+
'gpt-5-mini',
|
|
69
|
+
'gpt-5-nano',
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Static
|
|
74
|
+
@property
|
|
75
|
+
def provider(self) -> str:
|
|
76
|
+
return 'openai'
|
|
77
|
+
|
|
78
|
+
def _get_client_params(self) -> dict[str, Any]:
|
|
79
|
+
"""Prepare client parameters dictionary."""
|
|
80
|
+
# Define base client params
|
|
81
|
+
base_params = {
|
|
82
|
+
'api_key': self.api_key,
|
|
83
|
+
'organization': self.organization,
|
|
84
|
+
'project': self.project,
|
|
85
|
+
'base_url': self.base_url,
|
|
86
|
+
'websocket_base_url': self.websocket_base_url,
|
|
87
|
+
'timeout': self.timeout,
|
|
88
|
+
'max_retries': self.max_retries,
|
|
89
|
+
'default_headers': self.default_headers,
|
|
90
|
+
'default_query': self.default_query,
|
|
91
|
+
'_strict_response_validation': self._strict_response_validation,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Create client_params dict with non-None values
|
|
95
|
+
client_params = {k: v for k, v in base_params.items() if v is not None}
|
|
96
|
+
|
|
97
|
+
# Add http_client if provided
|
|
98
|
+
if self.http_client is not None:
|
|
99
|
+
client_params['http_client'] = self.http_client
|
|
100
|
+
|
|
101
|
+
return client_params
|
|
102
|
+
|
|
103
|
+
def get_client(self) -> AsyncOpenAI:
|
|
104
|
+
"""
|
|
105
|
+
Returns an AsyncOpenAI client.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
AsyncOpenAI: An instance of the AsyncOpenAI client.
|
|
109
|
+
"""
|
|
110
|
+
client_params = self._get_client_params()
|
|
111
|
+
return AsyncOpenAI(**client_params)
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def name(self) -> str:
|
|
115
|
+
return str(self.model)
|
|
116
|
+
|
|
117
|
+
def _get_usage(self, response: ChatCompletion) -> ChatInvokeUsage | None:
|
|
118
|
+
if response.usage is not None:
|
|
119
|
+
completion_tokens = response.usage.completion_tokens
|
|
120
|
+
completion_token_details = response.usage.completion_tokens_details
|
|
121
|
+
if completion_token_details is not None:
|
|
122
|
+
reasoning_tokens = completion_token_details.reasoning_tokens
|
|
123
|
+
if reasoning_tokens is not None:
|
|
124
|
+
completion_tokens += reasoning_tokens
|
|
125
|
+
|
|
126
|
+
usage = ChatInvokeUsage(
|
|
127
|
+
prompt_tokens=response.usage.prompt_tokens,
|
|
128
|
+
prompt_cached_tokens=response.usage.prompt_tokens_details.cached_tokens
|
|
129
|
+
if response.usage.prompt_tokens_details is not None
|
|
130
|
+
else None,
|
|
131
|
+
prompt_cache_creation_tokens=None,
|
|
132
|
+
prompt_image_tokens=None,
|
|
133
|
+
# Completion
|
|
134
|
+
completion_tokens=completion_tokens,
|
|
135
|
+
total_tokens=response.usage.total_tokens,
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
usage = None
|
|
139
|
+
|
|
140
|
+
return usage
|
|
141
|
+
|
|
142
|
+
@overload
|
|
143
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
|
|
144
|
+
|
|
145
|
+
@overload
|
|
146
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
|
|
147
|
+
|
|
148
|
+
async def ainvoke(
|
|
149
|
+
self, messages: list[BaseMessage], output_format: type[T] | None = None
|
|
150
|
+
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
|
|
151
|
+
"""
|
|
152
|
+
Invoke the model with the given messages.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
messages: List of chat messages
|
|
156
|
+
output_format: Optional Pydantic model class for structured output
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Either a string response or an instance of output_format
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
openai_messages = OpenAIMessageSerializer.serialize_messages(messages)
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
model_params: dict[str, Any] = {}
|
|
166
|
+
|
|
167
|
+
if self.temperature is not None:
|
|
168
|
+
model_params['temperature'] = self.temperature
|
|
169
|
+
|
|
170
|
+
if self.frequency_penalty is not None:
|
|
171
|
+
model_params['frequency_penalty'] = self.frequency_penalty
|
|
172
|
+
|
|
173
|
+
if self.max_completion_tokens is not None:
|
|
174
|
+
model_params['max_completion_tokens'] = self.max_completion_tokens
|
|
175
|
+
|
|
176
|
+
if self.top_p is not None:
|
|
177
|
+
model_params['top_p'] = self.top_p
|
|
178
|
+
|
|
179
|
+
if self.seed is not None:
|
|
180
|
+
model_params['seed'] = self.seed
|
|
181
|
+
|
|
182
|
+
if self.service_tier is not None:
|
|
183
|
+
model_params['service_tier'] = self.service_tier
|
|
184
|
+
|
|
185
|
+
if self.reasoning_models and any(str(m).lower() in str(self.model).lower() for m in self.reasoning_models):
|
|
186
|
+
model_params['reasoning_effort'] = self.reasoning_effort
|
|
187
|
+
del model_params['temperature']
|
|
188
|
+
del model_params['frequency_penalty']
|
|
189
|
+
|
|
190
|
+
if output_format is None:
|
|
191
|
+
# Return string response
|
|
192
|
+
response = await self.get_client().chat.completions.create(
|
|
193
|
+
model=self.model,
|
|
194
|
+
messages=openai_messages,
|
|
195
|
+
**model_params,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
usage = self._get_usage(response)
|
|
199
|
+
return ChatInvokeCompletion(
|
|
200
|
+
completion=response.choices[0].message.content or '',
|
|
201
|
+
usage=usage,
|
|
202
|
+
stop_reason=response.choices[0].finish_reason if response.choices else None,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
else:
|
|
206
|
+
response_format: JSONSchema = {
|
|
207
|
+
'name': 'agent_output',
|
|
208
|
+
'strict': True,
|
|
209
|
+
'schema': SchemaOptimizer.create_optimized_json_schema(output_format),
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
# Add JSON schema to system prompt if requested
|
|
213
|
+
if self.add_schema_to_system_prompt and openai_messages and openai_messages[0]['role'] == 'system':
|
|
214
|
+
schema_text = f'\n<json_schema>\n{response_format}\n</json_schema>'
|
|
215
|
+
if isinstance(openai_messages[0]['content'], str):
|
|
216
|
+
openai_messages[0]['content'] += schema_text
|
|
217
|
+
elif isinstance(openai_messages[0]['content'], Iterable):
|
|
218
|
+
openai_messages[0]['content'] = list(openai_messages[0]['content']) + [
|
|
219
|
+
ChatCompletionContentPartTextParam(text=schema_text, type='text')
|
|
220
|
+
]
|
|
221
|
+
|
|
222
|
+
if self.dont_force_structured_output:
|
|
223
|
+
response = await self.get_client().chat.completions.create(
|
|
224
|
+
model=self.model,
|
|
225
|
+
messages=openai_messages,
|
|
226
|
+
**model_params,
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
# Return structured response
|
|
230
|
+
response = await self.get_client().chat.completions.create(
|
|
231
|
+
model=self.model,
|
|
232
|
+
messages=openai_messages,
|
|
233
|
+
response_format=ResponseFormatJSONSchema(json_schema=response_format, type='json_schema'),
|
|
234
|
+
**model_params,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if response.choices[0].message.content is None:
|
|
238
|
+
raise ModelProviderError(
|
|
239
|
+
message='Failed to parse structured output from model response',
|
|
240
|
+
status_code=500,
|
|
241
|
+
model=self.name,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
usage = self._get_usage(response)
|
|
245
|
+
|
|
246
|
+
parsed = output_format.model_validate_json(response.choices[0].message.content)
|
|
247
|
+
|
|
248
|
+
return ChatInvokeCompletion(
|
|
249
|
+
completion=parsed,
|
|
250
|
+
usage=usage,
|
|
251
|
+
stop_reason=response.choices[0].finish_reason if response.choices else None,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
except RateLimitError as e:
|
|
255
|
+
raise ModelRateLimitError(message=e.message, model=self.name) from e
|
|
256
|
+
|
|
257
|
+
except APIConnectionError as e:
|
|
258
|
+
raise ModelProviderError(message=str(e), model=self.name) from e
|
|
259
|
+
|
|
260
|
+
except APIStatusError as e:
|
|
261
|
+
raise ModelProviderError(message=e.message, status_code=e.status_code, model=self.name) from e
|
|
262
|
+
|
|
263
|
+
except Exception as e:
|
|
264
|
+
raise ModelProviderError(message=str(e), model=self.name) from e
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from browser_use.llm.openai.chat import ChatOpenAI
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class ChatOpenAILike(ChatOpenAI):
|
|
8
|
+
"""
|
|
9
|
+
A class for to interact with any provider using the OpenAI API schema.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
model (str): The name of the OpenAI model to use.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
model: str
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
from typing import overload
|
|
2
|
+
|
|
3
|
+
from openai.types.chat import (
|
|
4
|
+
ChatCompletionAssistantMessageParam,
|
|
5
|
+
ChatCompletionContentPartImageParam,
|
|
6
|
+
ChatCompletionContentPartRefusalParam,
|
|
7
|
+
ChatCompletionContentPartTextParam,
|
|
8
|
+
ChatCompletionMessageFunctionToolCallParam,
|
|
9
|
+
ChatCompletionMessageParam,
|
|
10
|
+
ChatCompletionSystemMessageParam,
|
|
11
|
+
ChatCompletionUserMessageParam,
|
|
12
|
+
)
|
|
13
|
+
from openai.types.chat.chat_completion_content_part_image_param import ImageURL
|
|
14
|
+
from openai.types.chat.chat_completion_message_function_tool_call_param import Function
|
|
15
|
+
|
|
16
|
+
from browser_use.llm.messages import (
|
|
17
|
+
AssistantMessage,
|
|
18
|
+
BaseMessage,
|
|
19
|
+
ContentPartImageParam,
|
|
20
|
+
ContentPartRefusalParam,
|
|
21
|
+
ContentPartTextParam,
|
|
22
|
+
SystemMessage,
|
|
23
|
+
ToolCall,
|
|
24
|
+
UserMessage,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class OpenAIMessageSerializer:
|
|
29
|
+
"""Serializer for converting between custom message types and OpenAI message param types."""
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def _serialize_content_part_text(part: ContentPartTextParam) -> ChatCompletionContentPartTextParam:
|
|
33
|
+
return ChatCompletionContentPartTextParam(text=part.text, type='text')
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def _serialize_content_part_image(part: ContentPartImageParam) -> ChatCompletionContentPartImageParam:
|
|
37
|
+
return ChatCompletionContentPartImageParam(
|
|
38
|
+
image_url=ImageURL(url=part.image_url.url, detail=part.image_url.detail),
|
|
39
|
+
type='image_url',
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _serialize_content_part_refusal(part: ContentPartRefusalParam) -> ChatCompletionContentPartRefusalParam:
|
|
44
|
+
return ChatCompletionContentPartRefusalParam(refusal=part.refusal, type='refusal')
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _serialize_user_content(
|
|
48
|
+
content: str | list[ContentPartTextParam | ContentPartImageParam],
|
|
49
|
+
) -> str | list[ChatCompletionContentPartTextParam | ChatCompletionContentPartImageParam]:
|
|
50
|
+
"""Serialize content for user messages (text and images allowed)."""
|
|
51
|
+
if isinstance(content, str):
|
|
52
|
+
return content
|
|
53
|
+
|
|
54
|
+
serialized_parts: list[ChatCompletionContentPartTextParam | ChatCompletionContentPartImageParam] = []
|
|
55
|
+
for part in content:
|
|
56
|
+
if part.type == 'text':
|
|
57
|
+
serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_text(part))
|
|
58
|
+
elif part.type == 'image_url':
|
|
59
|
+
serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_image(part))
|
|
60
|
+
return serialized_parts
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def _serialize_system_content(
|
|
64
|
+
content: str | list[ContentPartTextParam],
|
|
65
|
+
) -> str | list[ChatCompletionContentPartTextParam]:
|
|
66
|
+
"""Serialize content for system messages (text only)."""
|
|
67
|
+
if isinstance(content, str):
|
|
68
|
+
return content
|
|
69
|
+
|
|
70
|
+
serialized_parts: list[ChatCompletionContentPartTextParam] = []
|
|
71
|
+
for part in content:
|
|
72
|
+
if part.type == 'text':
|
|
73
|
+
serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_text(part))
|
|
74
|
+
return serialized_parts
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def _serialize_assistant_content(
|
|
78
|
+
content: str | list[ContentPartTextParam | ContentPartRefusalParam] | None,
|
|
79
|
+
) -> str | list[ChatCompletionContentPartTextParam | ChatCompletionContentPartRefusalParam] | None:
|
|
80
|
+
"""Serialize content for assistant messages (text and refusal allowed)."""
|
|
81
|
+
if content is None:
|
|
82
|
+
return None
|
|
83
|
+
if isinstance(content, str):
|
|
84
|
+
return content
|
|
85
|
+
|
|
86
|
+
serialized_parts: list[ChatCompletionContentPartTextParam | ChatCompletionContentPartRefusalParam] = []
|
|
87
|
+
for part in content:
|
|
88
|
+
if part.type == 'text':
|
|
89
|
+
serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_text(part))
|
|
90
|
+
elif part.type == 'refusal':
|
|
91
|
+
serialized_parts.append(OpenAIMessageSerializer._serialize_content_part_refusal(part))
|
|
92
|
+
return serialized_parts
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _serialize_tool_call(tool_call: ToolCall) -> ChatCompletionMessageFunctionToolCallParam:
|
|
96
|
+
return ChatCompletionMessageFunctionToolCallParam(
|
|
97
|
+
id=tool_call.id,
|
|
98
|
+
function=Function(name=tool_call.function.name, arguments=tool_call.function.arguments),
|
|
99
|
+
type='function',
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# endregion
|
|
103
|
+
|
|
104
|
+
# region - Serialize overloads
|
|
105
|
+
@overload
|
|
106
|
+
@staticmethod
|
|
107
|
+
def serialize(message: UserMessage) -> ChatCompletionUserMessageParam: ...
|
|
108
|
+
|
|
109
|
+
@overload
|
|
110
|
+
@staticmethod
|
|
111
|
+
def serialize(message: SystemMessage) -> ChatCompletionSystemMessageParam: ...
|
|
112
|
+
|
|
113
|
+
@overload
|
|
114
|
+
@staticmethod
|
|
115
|
+
def serialize(message: AssistantMessage) -> ChatCompletionAssistantMessageParam: ...
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def serialize(message: BaseMessage) -> ChatCompletionMessageParam:
|
|
119
|
+
"""Serialize a custom message to an OpenAI message param."""
|
|
120
|
+
|
|
121
|
+
if isinstance(message, UserMessage):
|
|
122
|
+
user_result: ChatCompletionUserMessageParam = {
|
|
123
|
+
'role': 'user',
|
|
124
|
+
'content': OpenAIMessageSerializer._serialize_user_content(message.content),
|
|
125
|
+
}
|
|
126
|
+
if message.name is not None:
|
|
127
|
+
user_result['name'] = message.name
|
|
128
|
+
return user_result
|
|
129
|
+
|
|
130
|
+
elif isinstance(message, SystemMessage):
|
|
131
|
+
system_result: ChatCompletionSystemMessageParam = {
|
|
132
|
+
'role': 'system',
|
|
133
|
+
'content': OpenAIMessageSerializer._serialize_system_content(message.content),
|
|
134
|
+
}
|
|
135
|
+
if message.name is not None:
|
|
136
|
+
system_result['name'] = message.name
|
|
137
|
+
return system_result
|
|
138
|
+
|
|
139
|
+
elif isinstance(message, AssistantMessage):
|
|
140
|
+
# Handle content serialization
|
|
141
|
+
content = None
|
|
142
|
+
if message.content is not None:
|
|
143
|
+
content = OpenAIMessageSerializer._serialize_assistant_content(message.content)
|
|
144
|
+
|
|
145
|
+
assistant_result: ChatCompletionAssistantMessageParam = {'role': 'assistant'}
|
|
146
|
+
|
|
147
|
+
# Only add content if it's not None
|
|
148
|
+
if content is not None:
|
|
149
|
+
assistant_result['content'] = content
|
|
150
|
+
|
|
151
|
+
if message.name is not None:
|
|
152
|
+
assistant_result['name'] = message.name
|
|
153
|
+
if message.refusal is not None:
|
|
154
|
+
assistant_result['refusal'] = message.refusal
|
|
155
|
+
if message.tool_calls:
|
|
156
|
+
assistant_result['tool_calls'] = [OpenAIMessageSerializer._serialize_tool_call(tc) for tc in message.tool_calls]
|
|
157
|
+
|
|
158
|
+
return assistant_result
|
|
159
|
+
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError(f'Unknown message type: {type(message)}')
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def serialize_messages(messages: list[BaseMessage]) -> list[ChatCompletionMessageParam]:
|
|
165
|
+
return [OpenAIMessageSerializer.serialize(m) for m in messages]
|