optexity-browser-use 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browser_use/__init__.py +157 -0
- browser_use/actor/__init__.py +11 -0
- browser_use/actor/element.py +1175 -0
- browser_use/actor/mouse.py +134 -0
- browser_use/actor/page.py +561 -0
- browser_use/actor/playground/flights.py +41 -0
- browser_use/actor/playground/mixed_automation.py +54 -0
- browser_use/actor/playground/playground.py +236 -0
- browser_use/actor/utils.py +176 -0
- browser_use/agent/cloud_events.py +282 -0
- browser_use/agent/gif.py +424 -0
- browser_use/agent/judge.py +170 -0
- browser_use/agent/message_manager/service.py +473 -0
- browser_use/agent/message_manager/utils.py +52 -0
- browser_use/agent/message_manager/views.py +98 -0
- browser_use/agent/prompts.py +413 -0
- browser_use/agent/service.py +2316 -0
- browser_use/agent/system_prompt.md +185 -0
- browser_use/agent/system_prompt_flash.md +10 -0
- browser_use/agent/system_prompt_no_thinking.md +183 -0
- browser_use/agent/views.py +743 -0
- browser_use/browser/__init__.py +41 -0
- browser_use/browser/cloud/cloud.py +203 -0
- browser_use/browser/cloud/views.py +89 -0
- browser_use/browser/events.py +578 -0
- browser_use/browser/profile.py +1158 -0
- browser_use/browser/python_highlights.py +548 -0
- browser_use/browser/session.py +3225 -0
- browser_use/browser/session_manager.py +399 -0
- browser_use/browser/video_recorder.py +162 -0
- browser_use/browser/views.py +200 -0
- browser_use/browser/watchdog_base.py +260 -0
- browser_use/browser/watchdogs/__init__.py +0 -0
- browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
- browser_use/browser/watchdogs/crash_watchdog.py +335 -0
- browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
- browser_use/browser/watchdogs/dom_watchdog.py +817 -0
- browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
- browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
- browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
- browser_use/browser/watchdogs/popups_watchdog.py +143 -0
- browser_use/browser/watchdogs/recording_watchdog.py +126 -0
- browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
- browser_use/browser/watchdogs/security_watchdog.py +280 -0
- browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
- browser_use/cli.py +2359 -0
- browser_use/code_use/__init__.py +16 -0
- browser_use/code_use/formatting.py +192 -0
- browser_use/code_use/namespace.py +665 -0
- browser_use/code_use/notebook_export.py +276 -0
- browser_use/code_use/service.py +1340 -0
- browser_use/code_use/system_prompt.md +574 -0
- browser_use/code_use/utils.py +150 -0
- browser_use/code_use/views.py +171 -0
- browser_use/config.py +505 -0
- browser_use/controller/__init__.py +3 -0
- browser_use/dom/enhanced_snapshot.py +161 -0
- browser_use/dom/markdown_extractor.py +169 -0
- browser_use/dom/playground/extraction.py +312 -0
- browser_use/dom/playground/multi_act.py +32 -0
- browser_use/dom/serializer/clickable_elements.py +200 -0
- browser_use/dom/serializer/code_use_serializer.py +287 -0
- browser_use/dom/serializer/eval_serializer.py +478 -0
- browser_use/dom/serializer/html_serializer.py +212 -0
- browser_use/dom/serializer/paint_order.py +197 -0
- browser_use/dom/serializer/serializer.py +1170 -0
- browser_use/dom/service.py +825 -0
- browser_use/dom/utils.py +129 -0
- browser_use/dom/views.py +906 -0
- browser_use/exceptions.py +5 -0
- browser_use/filesystem/__init__.py +0 -0
- browser_use/filesystem/file_system.py +619 -0
- browser_use/init_cmd.py +376 -0
- browser_use/integrations/gmail/__init__.py +24 -0
- browser_use/integrations/gmail/actions.py +115 -0
- browser_use/integrations/gmail/service.py +225 -0
- browser_use/llm/__init__.py +155 -0
- browser_use/llm/anthropic/chat.py +242 -0
- browser_use/llm/anthropic/serializer.py +312 -0
- browser_use/llm/aws/__init__.py +36 -0
- browser_use/llm/aws/chat_anthropic.py +242 -0
- browser_use/llm/aws/chat_bedrock.py +289 -0
- browser_use/llm/aws/serializer.py +257 -0
- browser_use/llm/azure/chat.py +91 -0
- browser_use/llm/base.py +57 -0
- browser_use/llm/browser_use/__init__.py +3 -0
- browser_use/llm/browser_use/chat.py +201 -0
- browser_use/llm/cerebras/chat.py +193 -0
- browser_use/llm/cerebras/serializer.py +109 -0
- browser_use/llm/deepseek/chat.py +212 -0
- browser_use/llm/deepseek/serializer.py +109 -0
- browser_use/llm/exceptions.py +29 -0
- browser_use/llm/google/__init__.py +3 -0
- browser_use/llm/google/chat.py +542 -0
- browser_use/llm/google/serializer.py +120 -0
- browser_use/llm/groq/chat.py +229 -0
- browser_use/llm/groq/parser.py +158 -0
- browser_use/llm/groq/serializer.py +159 -0
- browser_use/llm/messages.py +238 -0
- browser_use/llm/models.py +271 -0
- browser_use/llm/oci_raw/__init__.py +10 -0
- browser_use/llm/oci_raw/chat.py +443 -0
- browser_use/llm/oci_raw/serializer.py +229 -0
- browser_use/llm/ollama/chat.py +97 -0
- browser_use/llm/ollama/serializer.py +143 -0
- browser_use/llm/openai/chat.py +264 -0
- browser_use/llm/openai/like.py +15 -0
- browser_use/llm/openai/serializer.py +165 -0
- browser_use/llm/openrouter/chat.py +211 -0
- browser_use/llm/openrouter/serializer.py +26 -0
- browser_use/llm/schema.py +176 -0
- browser_use/llm/views.py +48 -0
- browser_use/logging_config.py +330 -0
- browser_use/mcp/__init__.py +18 -0
- browser_use/mcp/__main__.py +12 -0
- browser_use/mcp/client.py +544 -0
- browser_use/mcp/controller.py +264 -0
- browser_use/mcp/server.py +1114 -0
- browser_use/observability.py +204 -0
- browser_use/py.typed +0 -0
- browser_use/sandbox/__init__.py +41 -0
- browser_use/sandbox/sandbox.py +637 -0
- browser_use/sandbox/views.py +132 -0
- browser_use/screenshots/__init__.py +1 -0
- browser_use/screenshots/service.py +52 -0
- browser_use/sync/__init__.py +6 -0
- browser_use/sync/auth.py +357 -0
- browser_use/sync/service.py +161 -0
- browser_use/telemetry/__init__.py +51 -0
- browser_use/telemetry/service.py +112 -0
- browser_use/telemetry/views.py +101 -0
- browser_use/tokens/__init__.py +0 -0
- browser_use/tokens/custom_pricing.py +24 -0
- browser_use/tokens/mappings.py +4 -0
- browser_use/tokens/service.py +580 -0
- browser_use/tokens/views.py +108 -0
- browser_use/tools/registry/service.py +572 -0
- browser_use/tools/registry/views.py +174 -0
- browser_use/tools/service.py +1675 -0
- browser_use/tools/utils.py +82 -0
- browser_use/tools/views.py +100 -0
- browser_use/utils.py +670 -0
- optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
- optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
- optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
- optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
- optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OCI Raw API chat model integration for browser-use.
|
|
3
|
+
|
|
4
|
+
This module provides direct integration with Oracle Cloud Infrastructure's
|
|
5
|
+
Generative AI service using raw API calls without Langchain dependencies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import json
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import TypeVar, overload
|
|
12
|
+
|
|
13
|
+
import oci
|
|
14
|
+
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
|
15
|
+
from oci.generative_ai_inference.models import (
|
|
16
|
+
BaseChatRequest,
|
|
17
|
+
ChatDetails,
|
|
18
|
+
CohereChatRequest,
|
|
19
|
+
GenericChatRequest,
|
|
20
|
+
OnDemandServingMode,
|
|
21
|
+
)
|
|
22
|
+
from pydantic import BaseModel
|
|
23
|
+
|
|
24
|
+
from browser_use.llm.base import BaseChatModel
|
|
25
|
+
from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
|
|
26
|
+
from browser_use.llm.messages import BaseMessage
|
|
27
|
+
from browser_use.llm.schema import SchemaOptimizer
|
|
28
|
+
from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
|
|
29
|
+
|
|
30
|
+
from .serializer import OCIRawMessageSerializer
|
|
31
|
+
|
|
32
|
+
T = TypeVar('T', bound=BaseModel)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ChatOCIRaw(BaseChatModel):
|
|
37
|
+
"""
|
|
38
|
+
A direct OCI Raw API integration for browser-use that bypasses Langchain.
|
|
39
|
+
|
|
40
|
+
This class provides a browser-use compatible interface for OCI GenAI models
|
|
41
|
+
using direct API calls to Oracle Cloud Infrastructure.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
model_id: The OCI GenAI model OCID
|
|
45
|
+
service_endpoint: The OCI service endpoint URL
|
|
46
|
+
compartment_id: The OCI compartment OCID
|
|
47
|
+
provider: The model provider (e.g., "meta", "cohere", "xai")
|
|
48
|
+
temperature: Temperature for response generation (0.0-2.0) - supported by all providers
|
|
49
|
+
max_tokens: Maximum tokens in response - supported by all providers
|
|
50
|
+
frequency_penalty: Frequency penalty for response generation - supported by Meta and Cohere only
|
|
51
|
+
presence_penalty: Presence penalty for response generation - supported by Meta only
|
|
52
|
+
top_p: Top-p sampling parameter - supported by all providers
|
|
53
|
+
top_k: Top-k sampling parameter - supported by Cohere and xAI only
|
|
54
|
+
auth_type: Authentication type (e.g., "API_KEY")
|
|
55
|
+
auth_profile: Authentication profile name
|
|
56
|
+
timeout: Request timeout in seconds
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Model configuration
|
|
60
|
+
model_id: str
|
|
61
|
+
service_endpoint: str
|
|
62
|
+
compartment_id: str
|
|
63
|
+
provider: str = 'meta'
|
|
64
|
+
|
|
65
|
+
# Model parameters
|
|
66
|
+
temperature: float | None = 1.0
|
|
67
|
+
max_tokens: int | None = 600
|
|
68
|
+
frequency_penalty: float | None = 0.0
|
|
69
|
+
presence_penalty: float | None = 0.0
|
|
70
|
+
top_p: float | None = 0.75
|
|
71
|
+
top_k: int | None = 0 # Used by Cohere models
|
|
72
|
+
|
|
73
|
+
# Authentication
|
|
74
|
+
auth_type: str = 'API_KEY'
|
|
75
|
+
auth_profile: str = 'DEFAULT'
|
|
76
|
+
|
|
77
|
+
# Client configuration
|
|
78
|
+
timeout: float = 60.0
|
|
79
|
+
|
|
80
|
+
# Static properties
|
|
81
|
+
@property
|
|
82
|
+
def provider_name(self) -> str:
|
|
83
|
+
return 'oci-raw'
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def name(self) -> str:
|
|
87
|
+
# Return a shorter name for telemetry (max 100 chars)
|
|
88
|
+
if len(self.model_id) > 90:
|
|
89
|
+
# Extract the model name from the OCID
|
|
90
|
+
parts = self.model_id.split('.')
|
|
91
|
+
if len(parts) >= 4:
|
|
92
|
+
return f'oci-{self.provider}-{parts[3]}' # e.g., "oci-meta-us-chicago-1"
|
|
93
|
+
else:
|
|
94
|
+
return f'oci-{self.provider}-model'
|
|
95
|
+
return self.model_id
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def model(self) -> str:
|
|
99
|
+
return self.model_id
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def model_name(self) -> str:
|
|
103
|
+
# Override for telemetry - return shorter name (max 100 chars)
|
|
104
|
+
if len(self.model_id) > 90:
|
|
105
|
+
# Extract the model name from the OCID
|
|
106
|
+
parts = self.model_id.split('.')
|
|
107
|
+
if len(parts) >= 4:
|
|
108
|
+
return f'oci-{self.provider}-{parts[3]}' # e.g., "oci-meta-us-chicago-1"
|
|
109
|
+
else:
|
|
110
|
+
return f'oci-{self.provider}-model'
|
|
111
|
+
return self.model_id
|
|
112
|
+
|
|
113
|
+
def _uses_cohere_format(self) -> bool:
|
|
114
|
+
"""Check if the provider uses Cohere chat request format."""
|
|
115
|
+
return self.provider.lower() == 'cohere'
|
|
116
|
+
|
|
117
|
+
def _get_supported_parameters(self) -> dict[str, bool]:
|
|
118
|
+
"""Get which parameters are supported by the current provider."""
|
|
119
|
+
provider = self.provider.lower()
|
|
120
|
+
if provider == 'meta':
|
|
121
|
+
return {
|
|
122
|
+
'temperature': True,
|
|
123
|
+
'max_tokens': True,
|
|
124
|
+
'frequency_penalty': True,
|
|
125
|
+
'presence_penalty': True,
|
|
126
|
+
'top_p': True,
|
|
127
|
+
'top_k': False,
|
|
128
|
+
}
|
|
129
|
+
elif provider == 'cohere':
|
|
130
|
+
return {
|
|
131
|
+
'temperature': True,
|
|
132
|
+
'max_tokens': True,
|
|
133
|
+
'frequency_penalty': True,
|
|
134
|
+
'presence_penalty': False,
|
|
135
|
+
'top_p': True,
|
|
136
|
+
'top_k': True,
|
|
137
|
+
}
|
|
138
|
+
elif provider == 'xai':
|
|
139
|
+
return {
|
|
140
|
+
'temperature': True,
|
|
141
|
+
'max_tokens': True,
|
|
142
|
+
'frequency_penalty': False,
|
|
143
|
+
'presence_penalty': False,
|
|
144
|
+
'top_p': True,
|
|
145
|
+
'top_k': True,
|
|
146
|
+
}
|
|
147
|
+
else:
|
|
148
|
+
# Default: assume all parameters are supported
|
|
149
|
+
return {
|
|
150
|
+
'temperature': True,
|
|
151
|
+
'max_tokens': True,
|
|
152
|
+
'frequency_penalty': True,
|
|
153
|
+
'presence_penalty': True,
|
|
154
|
+
'top_p': True,
|
|
155
|
+
'top_k': True,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
def _get_oci_client(self) -> GenerativeAiInferenceClient:
|
|
159
|
+
"""Get the OCI GenerativeAiInferenceClient following your working example."""
|
|
160
|
+
if not hasattr(self, '_client'):
|
|
161
|
+
# Configure OCI client based on auth_type (following your working example)
|
|
162
|
+
if self.auth_type == 'API_KEY':
|
|
163
|
+
config = oci.config.from_file('~/.oci/config', self.auth_profile)
|
|
164
|
+
self._client = GenerativeAiInferenceClient(
|
|
165
|
+
config=config,
|
|
166
|
+
service_endpoint=self.service_endpoint,
|
|
167
|
+
retry_strategy=oci.retry.NoneRetryStrategy(),
|
|
168
|
+
timeout=(10, 240), # Following your working example
|
|
169
|
+
)
|
|
170
|
+
elif self.auth_type == 'INSTANCE_PRINCIPAL':
|
|
171
|
+
config = {}
|
|
172
|
+
signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
|
173
|
+
self._client = GenerativeAiInferenceClient(
|
|
174
|
+
config=config,
|
|
175
|
+
signer=signer,
|
|
176
|
+
service_endpoint=self.service_endpoint,
|
|
177
|
+
retry_strategy=oci.retry.NoneRetryStrategy(),
|
|
178
|
+
timeout=(10, 240),
|
|
179
|
+
)
|
|
180
|
+
elif self.auth_type == 'RESOURCE_PRINCIPAL':
|
|
181
|
+
config = {}
|
|
182
|
+
signer = oci.auth.signers.get_resource_principals_signer()
|
|
183
|
+
self._client = GenerativeAiInferenceClient(
|
|
184
|
+
config=config,
|
|
185
|
+
signer=signer,
|
|
186
|
+
service_endpoint=self.service_endpoint,
|
|
187
|
+
retry_strategy=oci.retry.NoneRetryStrategy(),
|
|
188
|
+
timeout=(10, 240),
|
|
189
|
+
)
|
|
190
|
+
else:
|
|
191
|
+
# Fallback to API_KEY
|
|
192
|
+
config = oci.config.from_file('~/.oci/config', self.auth_profile)
|
|
193
|
+
self._client = GenerativeAiInferenceClient(
|
|
194
|
+
config=config,
|
|
195
|
+
service_endpoint=self.service_endpoint,
|
|
196
|
+
retry_strategy=oci.retry.NoneRetryStrategy(),
|
|
197
|
+
timeout=(10, 240),
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return self._client
|
|
201
|
+
|
|
202
|
+
def _extract_usage(self, response) -> ChatInvokeUsage | None:
|
|
203
|
+
"""Extract usage information from OCI response."""
|
|
204
|
+
try:
|
|
205
|
+
# The response is the direct OCI response object, not a dict
|
|
206
|
+
if hasattr(response, 'data') and hasattr(response.data, 'chat_response'):
|
|
207
|
+
chat_response = response.data.chat_response
|
|
208
|
+
if hasattr(chat_response, 'usage'):
|
|
209
|
+
usage = chat_response.usage
|
|
210
|
+
return ChatInvokeUsage(
|
|
211
|
+
prompt_tokens=getattr(usage, 'prompt_tokens', 0),
|
|
212
|
+
prompt_cached_tokens=None,
|
|
213
|
+
prompt_cache_creation_tokens=None,
|
|
214
|
+
prompt_image_tokens=None,
|
|
215
|
+
completion_tokens=getattr(usage, 'completion_tokens', 0),
|
|
216
|
+
total_tokens=getattr(usage, 'total_tokens', 0),
|
|
217
|
+
)
|
|
218
|
+
return None
|
|
219
|
+
except Exception:
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
def _extract_content(self, response) -> str:
|
|
223
|
+
"""Extract text content from OCI response."""
|
|
224
|
+
try:
|
|
225
|
+
# The response is the direct OCI response object, not a dict
|
|
226
|
+
if not hasattr(response, 'data'):
|
|
227
|
+
raise ModelProviderError(message='Invalid response format: no data attribute', status_code=500, model=self.name)
|
|
228
|
+
|
|
229
|
+
chat_response = response.data.chat_response
|
|
230
|
+
|
|
231
|
+
# Handle different response types based on provider
|
|
232
|
+
if hasattr(chat_response, 'text'):
|
|
233
|
+
# Cohere response format - has direct text attribute
|
|
234
|
+
return chat_response.text or ''
|
|
235
|
+
elif hasattr(chat_response, 'choices') and chat_response.choices:
|
|
236
|
+
# Generic response format - has choices array (Meta, xAI)
|
|
237
|
+
choice = chat_response.choices[0]
|
|
238
|
+
message = choice.message
|
|
239
|
+
content_parts = message.content
|
|
240
|
+
|
|
241
|
+
# Extract text from content parts
|
|
242
|
+
text_parts = []
|
|
243
|
+
for part in content_parts:
|
|
244
|
+
if hasattr(part, 'text'):
|
|
245
|
+
text_parts.append(part.text)
|
|
246
|
+
|
|
247
|
+
return '\n'.join(text_parts) if text_parts else ''
|
|
248
|
+
else:
|
|
249
|
+
raise ModelProviderError(
|
|
250
|
+
message=f'Unsupported response format: {type(chat_response).__name__}', status_code=500, model=self.name
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
raise ModelProviderError(
|
|
255
|
+
message=f'Failed to extract content from response: {str(e)}', status_code=500, model=self.name
|
|
256
|
+
) from e
|
|
257
|
+
|
|
258
|
+
async def _make_request(self, messages: list[BaseMessage]):
|
|
259
|
+
"""Make async request to OCI API using proper OCI SDK models."""
|
|
260
|
+
|
|
261
|
+
# Create chat request based on provider type
|
|
262
|
+
if self._uses_cohere_format():
|
|
263
|
+
# Cohere models use CohereChatRequest with single message string
|
|
264
|
+
message_text = OCIRawMessageSerializer.serialize_messages_for_cohere(messages)
|
|
265
|
+
|
|
266
|
+
chat_request = CohereChatRequest()
|
|
267
|
+
chat_request.message = message_text
|
|
268
|
+
chat_request.max_tokens = self.max_tokens
|
|
269
|
+
chat_request.temperature = self.temperature
|
|
270
|
+
chat_request.frequency_penalty = self.frequency_penalty
|
|
271
|
+
chat_request.top_p = self.top_p
|
|
272
|
+
chat_request.top_k = self.top_k
|
|
273
|
+
else:
|
|
274
|
+
# Meta, xAI and other models use GenericChatRequest with messages array
|
|
275
|
+
oci_messages = OCIRawMessageSerializer.serialize_messages(messages)
|
|
276
|
+
|
|
277
|
+
chat_request = GenericChatRequest()
|
|
278
|
+
chat_request.api_format = BaseChatRequest.API_FORMAT_GENERIC
|
|
279
|
+
chat_request.messages = oci_messages
|
|
280
|
+
chat_request.max_tokens = self.max_tokens
|
|
281
|
+
chat_request.temperature = self.temperature
|
|
282
|
+
chat_request.top_p = self.top_p
|
|
283
|
+
|
|
284
|
+
# Provider-specific parameters
|
|
285
|
+
if self.provider.lower() == 'meta':
|
|
286
|
+
# Meta models support frequency_penalty and presence_penalty
|
|
287
|
+
chat_request.frequency_penalty = self.frequency_penalty
|
|
288
|
+
chat_request.presence_penalty = self.presence_penalty
|
|
289
|
+
elif self.provider.lower() == 'xai':
|
|
290
|
+
# xAI models support top_k but not frequency_penalty or presence_penalty
|
|
291
|
+
chat_request.top_k = self.top_k
|
|
292
|
+
else:
|
|
293
|
+
# Default: include all parameters for unknown providers
|
|
294
|
+
chat_request.frequency_penalty = self.frequency_penalty
|
|
295
|
+
chat_request.presence_penalty = self.presence_penalty
|
|
296
|
+
|
|
297
|
+
# Create serving mode
|
|
298
|
+
serving_mode = OnDemandServingMode(model_id=self.model_id)
|
|
299
|
+
|
|
300
|
+
# Create chat details
|
|
301
|
+
chat_details = ChatDetails()
|
|
302
|
+
chat_details.serving_mode = serving_mode
|
|
303
|
+
chat_details.chat_request = chat_request
|
|
304
|
+
chat_details.compartment_id = self.compartment_id
|
|
305
|
+
|
|
306
|
+
# Make the request in a thread to avoid blocking
|
|
307
|
+
def _sync_request():
|
|
308
|
+
try:
|
|
309
|
+
client = self._get_oci_client()
|
|
310
|
+
response = client.chat(chat_details)
|
|
311
|
+
return response # Return the raw response object
|
|
312
|
+
except Exception as e:
|
|
313
|
+
# Handle OCI-specific exceptions
|
|
314
|
+
status_code = getattr(e, 'status', 500)
|
|
315
|
+
if status_code == 429:
|
|
316
|
+
raise ModelRateLimitError(message=f'Rate limit exceeded: {str(e)}', model=self.name) from e
|
|
317
|
+
else:
|
|
318
|
+
raise ModelProviderError(message=str(e), status_code=status_code, model=self.name) from e
|
|
319
|
+
|
|
320
|
+
# Run in thread pool to make it async
|
|
321
|
+
loop = asyncio.get_event_loop()
|
|
322
|
+
return await loop.run_in_executor(None, _sync_request)
|
|
323
|
+
|
|
324
|
+
@overload
|
|
325
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
|
|
326
|
+
|
|
327
|
+
@overload
|
|
328
|
+
async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
|
|
329
|
+
|
|
330
|
+
async def ainvoke(
|
|
331
|
+
self, messages: list[BaseMessage], output_format: type[T] | None = None
|
|
332
|
+
) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
|
|
333
|
+
"""
|
|
334
|
+
Invoke the OCI GenAI model with the given messages using raw API.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
messages: List of chat messages
|
|
338
|
+
output_format: Optional Pydantic model class for structured output
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Either a string response or an instance of output_format
|
|
342
|
+
"""
|
|
343
|
+
try:
|
|
344
|
+
if output_format is None:
|
|
345
|
+
# Return string response
|
|
346
|
+
response = await self._make_request(messages)
|
|
347
|
+
content = self._extract_content(response)
|
|
348
|
+
usage = self._extract_usage(response)
|
|
349
|
+
|
|
350
|
+
return ChatInvokeCompletion(
|
|
351
|
+
completion=content,
|
|
352
|
+
usage=usage,
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
# For structured output, add JSON schema instructions
|
|
356
|
+
optimized_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
|
|
357
|
+
|
|
358
|
+
# Add JSON schema instruction to messages
|
|
359
|
+
system_instruction = f"""
|
|
360
|
+
You must respond with ONLY a valid JSON object that matches this exact schema:
|
|
361
|
+
{json.dumps(optimized_schema, indent=2)}
|
|
362
|
+
|
|
363
|
+
IMPORTANT:
|
|
364
|
+
- Your response must be ONLY the JSON object, no additional text
|
|
365
|
+
- The JSON must be valid and parseable
|
|
366
|
+
- All required fields must be present
|
|
367
|
+
- No extra fields are allowed
|
|
368
|
+
- Use proper JSON syntax with double quotes
|
|
369
|
+
"""
|
|
370
|
+
|
|
371
|
+
# Clone messages and add system instruction
|
|
372
|
+
modified_messages = messages.copy()
|
|
373
|
+
|
|
374
|
+
# Add or modify system message
|
|
375
|
+
from browser_use.llm.messages import SystemMessage
|
|
376
|
+
|
|
377
|
+
if modified_messages and hasattr(modified_messages[0], 'role') and modified_messages[0].role == 'system':
|
|
378
|
+
# Modify existing system message
|
|
379
|
+
existing_content = modified_messages[0].content
|
|
380
|
+
if isinstance(existing_content, str):
|
|
381
|
+
modified_messages[0].content = existing_content + '\n\n' + system_instruction
|
|
382
|
+
else:
|
|
383
|
+
# Handle list content
|
|
384
|
+
modified_messages[0].content = str(existing_content) + '\n\n' + system_instruction
|
|
385
|
+
else:
|
|
386
|
+
# Insert new system message at the beginning
|
|
387
|
+
modified_messages.insert(0, SystemMessage(content=system_instruction))
|
|
388
|
+
|
|
389
|
+
response = await self._make_request(modified_messages)
|
|
390
|
+
response_text = self._extract_content(response)
|
|
391
|
+
|
|
392
|
+
# Clean and parse the JSON response
|
|
393
|
+
try:
|
|
394
|
+
# Clean the response text
|
|
395
|
+
cleaned_text = response_text.strip()
|
|
396
|
+
|
|
397
|
+
# Remove markdown code blocks if present
|
|
398
|
+
if cleaned_text.startswith('```json'):
|
|
399
|
+
cleaned_text = cleaned_text[7:]
|
|
400
|
+
if cleaned_text.startswith('```'):
|
|
401
|
+
cleaned_text = cleaned_text[3:]
|
|
402
|
+
if cleaned_text.endswith('```'):
|
|
403
|
+
cleaned_text = cleaned_text[:-3]
|
|
404
|
+
|
|
405
|
+
cleaned_text = cleaned_text.strip()
|
|
406
|
+
|
|
407
|
+
# Try to find JSON object in the response
|
|
408
|
+
if not cleaned_text.startswith('{'):
|
|
409
|
+
start_idx = cleaned_text.find('{')
|
|
410
|
+
end_idx = cleaned_text.rfind('}')
|
|
411
|
+
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
|
|
412
|
+
cleaned_text = cleaned_text[start_idx : end_idx + 1]
|
|
413
|
+
|
|
414
|
+
# Parse the JSON
|
|
415
|
+
parsed_data = json.loads(cleaned_text)
|
|
416
|
+
parsed = output_format.model_validate(parsed_data)
|
|
417
|
+
|
|
418
|
+
usage = self._extract_usage(response)
|
|
419
|
+
return ChatInvokeCompletion(
|
|
420
|
+
completion=parsed,
|
|
421
|
+
usage=usage,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
425
|
+
raise ModelProviderError(
|
|
426
|
+
message=f'Failed to parse structured output: {str(e)}. Response was: {response_text[:200]}...',
|
|
427
|
+
status_code=500,
|
|
428
|
+
model=self.name,
|
|
429
|
+
) from e
|
|
430
|
+
|
|
431
|
+
except ModelRateLimitError:
|
|
432
|
+
# Re-raise rate limit errors as-is
|
|
433
|
+
raise
|
|
434
|
+
except ModelProviderError:
|
|
435
|
+
# Re-raise provider errors as-is
|
|
436
|
+
raise
|
|
437
|
+
except Exception as e:
|
|
438
|
+
# Handle any other exceptions
|
|
439
|
+
raise ModelProviderError(
|
|
440
|
+
message=f'Unexpected error: {str(e)}',
|
|
441
|
+
status_code=500,
|
|
442
|
+
model=self.name,
|
|
443
|
+
) from e
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Message serializer for OCI Raw API integration.
|
|
3
|
+
|
|
4
|
+
This module handles the conversion between browser-use message formats
|
|
5
|
+
and the OCI Raw API message format using proper OCI SDK models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from oci.generative_ai_inference.models import ImageContent, ImageUrl, Message, TextContent
|
|
9
|
+
|
|
10
|
+
from browser_use.llm.messages import (
|
|
11
|
+
AssistantMessage,
|
|
12
|
+
BaseMessage,
|
|
13
|
+
ContentPartImageParam,
|
|
14
|
+
SystemMessage,
|
|
15
|
+
UserMessage,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class OCIRawMessageSerializer:
|
|
20
|
+
"""
|
|
21
|
+
Serializer for converting between browser-use message types and OCI Raw API message formats.
|
|
22
|
+
Uses proper OCI SDK model objects as shown in the working example.
|
|
23
|
+
|
|
24
|
+
Supports both:
|
|
25
|
+
- GenericChatRequest (Meta, xAI models) - uses messages array
|
|
26
|
+
- CohereChatRequest (Cohere models) - uses single message string
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def _is_base64_image(url: str) -> bool:
|
|
31
|
+
"""Check if the URL is a base64 encoded image."""
|
|
32
|
+
return url.startswith('data:image/')
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def _parse_base64_url(url: str) -> str:
|
|
36
|
+
"""Parse base64 URL and return the base64 data."""
|
|
37
|
+
if not OCIRawMessageSerializer._is_base64_image(url):
|
|
38
|
+
raise ValueError(f'Not a base64 image URL: {url}')
|
|
39
|
+
|
|
40
|
+
# Extract the base64 data from data:image/png;base64,<data>
|
|
41
|
+
try:
|
|
42
|
+
header, data = url.split(',', 1)
|
|
43
|
+
return data
|
|
44
|
+
except ValueError:
|
|
45
|
+
raise ValueError(f'Invalid base64 image URL format: {url}')
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def _create_image_content(part: ContentPartImageParam) -> ImageContent:
|
|
49
|
+
"""Convert ContentPartImageParam to OCI ImageContent."""
|
|
50
|
+
url = part.image_url.url
|
|
51
|
+
|
|
52
|
+
if OCIRawMessageSerializer._is_base64_image(url):
|
|
53
|
+
# Handle base64 encoded images - OCI expects data URLs as-is
|
|
54
|
+
image_url = ImageUrl(url=url)
|
|
55
|
+
else:
|
|
56
|
+
# Handle regular URLs
|
|
57
|
+
image_url = ImageUrl(url=url)
|
|
58
|
+
|
|
59
|
+
return ImageContent(image_url=image_url)
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def serialize_messages(messages: list[BaseMessage]) -> list[Message]:
|
|
63
|
+
"""
|
|
64
|
+
Serialize a list of browser-use messages to OCI Raw API Message objects.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
messages: List of browser-use messages
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
List of OCI Message objects
|
|
71
|
+
"""
|
|
72
|
+
oci_messages = []
|
|
73
|
+
|
|
74
|
+
for message in messages:
|
|
75
|
+
oci_message = Message()
|
|
76
|
+
|
|
77
|
+
if isinstance(message, UserMessage):
|
|
78
|
+
oci_message.role = 'USER'
|
|
79
|
+
content = message.content
|
|
80
|
+
if isinstance(content, str):
|
|
81
|
+
text_content = TextContent()
|
|
82
|
+
text_content.text = content
|
|
83
|
+
oci_message.content = [text_content]
|
|
84
|
+
elif isinstance(content, list):
|
|
85
|
+
# Handle content parts - text and images
|
|
86
|
+
contents = []
|
|
87
|
+
for part in content:
|
|
88
|
+
if part.type == 'text':
|
|
89
|
+
text_content = TextContent()
|
|
90
|
+
text_content.text = part.text
|
|
91
|
+
contents.append(text_content)
|
|
92
|
+
elif part.type == 'image_url':
|
|
93
|
+
image_content = OCIRawMessageSerializer._create_image_content(part)
|
|
94
|
+
contents.append(image_content)
|
|
95
|
+
if contents:
|
|
96
|
+
oci_message.content = contents
|
|
97
|
+
|
|
98
|
+
elif isinstance(message, SystemMessage):
|
|
99
|
+
oci_message.role = 'SYSTEM'
|
|
100
|
+
content = message.content
|
|
101
|
+
if isinstance(content, str):
|
|
102
|
+
text_content = TextContent()
|
|
103
|
+
text_content.text = content
|
|
104
|
+
oci_message.content = [text_content]
|
|
105
|
+
elif isinstance(content, list):
|
|
106
|
+
# Handle content parts - typically just text for system messages
|
|
107
|
+
contents = []
|
|
108
|
+
for part in content:
|
|
109
|
+
if part.type == 'text':
|
|
110
|
+
text_content = TextContent()
|
|
111
|
+
text_content.text = part.text
|
|
112
|
+
contents.append(text_content)
|
|
113
|
+
elif part.type == 'image_url':
|
|
114
|
+
# System messages can theoretically have images too
|
|
115
|
+
image_content = OCIRawMessageSerializer._create_image_content(part)
|
|
116
|
+
contents.append(image_content)
|
|
117
|
+
if contents:
|
|
118
|
+
oci_message.content = contents
|
|
119
|
+
|
|
120
|
+
elif isinstance(message, AssistantMessage):
|
|
121
|
+
oci_message.role = 'ASSISTANT'
|
|
122
|
+
content = message.content
|
|
123
|
+
if isinstance(content, str):
|
|
124
|
+
text_content = TextContent()
|
|
125
|
+
text_content.text = content
|
|
126
|
+
oci_message.content = [text_content]
|
|
127
|
+
elif isinstance(content, list):
|
|
128
|
+
# Handle content parts - text, images, and refusals
|
|
129
|
+
contents = []
|
|
130
|
+
for part in content:
|
|
131
|
+
if part.type == 'text':
|
|
132
|
+
text_content = TextContent()
|
|
133
|
+
text_content.text = part.text
|
|
134
|
+
contents.append(text_content)
|
|
135
|
+
elif part.type == 'image_url':
|
|
136
|
+
# Assistant messages can have images in responses
|
|
137
|
+
# Note: This is currently unreachable in browser-use but kept for completeness
|
|
138
|
+
image_content = OCIRawMessageSerializer._create_image_content(part)
|
|
139
|
+
contents.append(image_content)
|
|
140
|
+
elif part.type == 'refusal':
|
|
141
|
+
text_content = TextContent()
|
|
142
|
+
text_content.text = f'[Refusal] {part.refusal}'
|
|
143
|
+
contents.append(text_content)
|
|
144
|
+
if contents:
|
|
145
|
+
oci_message.content = contents
|
|
146
|
+
else:
|
|
147
|
+
# Fallback for any message format issues
|
|
148
|
+
oci_message.role = 'USER'
|
|
149
|
+
text_content = TextContent()
|
|
150
|
+
text_content.text = str(message)
|
|
151
|
+
oci_message.content = [text_content]
|
|
152
|
+
|
|
153
|
+
# Only append messages that have content
|
|
154
|
+
if hasattr(oci_message, 'content') and oci_message.content:
|
|
155
|
+
oci_messages.append(oci_message)
|
|
156
|
+
|
|
157
|
+
return oci_messages
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def serialize_messages_for_cohere(messages: list[BaseMessage]) -> str:
|
|
161
|
+
"""
|
|
162
|
+
Serialize messages for Cohere models which expect a single message string.
|
|
163
|
+
|
|
164
|
+
Cohere models use CohereChatRequest.message (string) instead of messages array.
|
|
165
|
+
We combine all messages into a single conversation string.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
messages: List of browser-use messages
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Single string containing the conversation
|
|
172
|
+
"""
|
|
173
|
+
conversation_parts = []
|
|
174
|
+
|
|
175
|
+
for message in messages:
|
|
176
|
+
content = ''
|
|
177
|
+
|
|
178
|
+
if isinstance(message, UserMessage):
|
|
179
|
+
if isinstance(message.content, str):
|
|
180
|
+
content = message.content
|
|
181
|
+
elif isinstance(message.content, list):
|
|
182
|
+
# Extract text from content parts
|
|
183
|
+
text_parts = []
|
|
184
|
+
for part in message.content:
|
|
185
|
+
if part.type == 'text':
|
|
186
|
+
text_parts.append(part.text)
|
|
187
|
+
elif part.type == 'image_url':
|
|
188
|
+
# Cohere may not support images in all models, use a short placeholder
|
|
189
|
+
# to avoid massive token usage from base64 data URIs
|
|
190
|
+
if part.image_url.url.startswith('data:image/'):
|
|
191
|
+
text_parts.append('[Image: base64_data]')
|
|
192
|
+
else:
|
|
193
|
+
text_parts.append('[Image: external_url]')
|
|
194
|
+
content = ' '.join(text_parts)
|
|
195
|
+
|
|
196
|
+
conversation_parts.append(f'User: {content}')
|
|
197
|
+
|
|
198
|
+
elif isinstance(message, SystemMessage):
|
|
199
|
+
if isinstance(message.content, str):
|
|
200
|
+
content = message.content
|
|
201
|
+
elif isinstance(message.content, list):
|
|
202
|
+
# Extract text from content parts
|
|
203
|
+
text_parts = []
|
|
204
|
+
for part in message.content:
|
|
205
|
+
if part.type == 'text':
|
|
206
|
+
text_parts.append(part.text)
|
|
207
|
+
content = ' '.join(text_parts)
|
|
208
|
+
|
|
209
|
+
conversation_parts.append(f'System: {content}')
|
|
210
|
+
|
|
211
|
+
elif isinstance(message, AssistantMessage):
|
|
212
|
+
if isinstance(message.content, str):
|
|
213
|
+
content = message.content
|
|
214
|
+
elif isinstance(message.content, list):
|
|
215
|
+
# Extract text from content parts
|
|
216
|
+
text_parts = []
|
|
217
|
+
for part in message.content:
|
|
218
|
+
if part.type == 'text':
|
|
219
|
+
text_parts.append(part.text)
|
|
220
|
+
elif part.type == 'refusal':
|
|
221
|
+
text_parts.append(f'[Refusal] {part.refusal}')
|
|
222
|
+
content = ' '.join(text_parts)
|
|
223
|
+
|
|
224
|
+
conversation_parts.append(f'Assistant: {content}')
|
|
225
|
+
else:
|
|
226
|
+
# Fallback
|
|
227
|
+
conversation_parts.append(f'User: {str(message)}')
|
|
228
|
+
|
|
229
|
+
return '\n\n'.join(conversation_parts)
|