optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,443 @@
1
+ """
2
+ OCI Raw API chat model integration for browser-use.
3
+
4
+ This module provides direct integration with Oracle Cloud Infrastructure's
5
+ Generative AI service using raw API calls without Langchain dependencies.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ from dataclasses import dataclass
11
+ from typing import TypeVar, overload
12
+
13
+ import oci
14
+ from oci.generative_ai_inference import GenerativeAiInferenceClient
15
+ from oci.generative_ai_inference.models import (
16
+ BaseChatRequest,
17
+ ChatDetails,
18
+ CohereChatRequest,
19
+ GenericChatRequest,
20
+ OnDemandServingMode,
21
+ )
22
+ from pydantic import BaseModel
23
+
24
+ from browser_use.llm.base import BaseChatModel
25
+ from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
26
+ from browser_use.llm.messages import BaseMessage
27
+ from browser_use.llm.schema import SchemaOptimizer
28
+ from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
29
+
30
+ from .serializer import OCIRawMessageSerializer
31
+
32
+ T = TypeVar('T', bound=BaseModel)
33
+
34
+
35
+ @dataclass
36
+ class ChatOCIRaw(BaseChatModel):
37
+ """
38
+ A direct OCI Raw API integration for browser-use that bypasses Langchain.
39
+
40
+ This class provides a browser-use compatible interface for OCI GenAI models
41
+ using direct API calls to Oracle Cloud Infrastructure.
42
+
43
+ Args:
44
+ model_id: The OCI GenAI model OCID
45
+ service_endpoint: The OCI service endpoint URL
46
+ compartment_id: The OCI compartment OCID
47
+ provider: The model provider (e.g., "meta", "cohere", "xai")
48
+ temperature: Temperature for response generation (0.0-2.0) - supported by all providers
49
+ max_tokens: Maximum tokens in response - supported by all providers
50
+ frequency_penalty: Frequency penalty for response generation - supported by Meta and Cohere only
51
+ presence_penalty: Presence penalty for response generation - supported by Meta only
52
+ top_p: Top-p sampling parameter - supported by all providers
53
+ top_k: Top-k sampling parameter - supported by Cohere and xAI only
54
+ auth_type: Authentication type (e.g., "API_KEY")
55
+ auth_profile: Authentication profile name
56
+ timeout: Request timeout in seconds
57
+ """
58
+
59
+ # Model configuration
60
+ model_id: str
61
+ service_endpoint: str
62
+ compartment_id: str
63
+ provider: str = 'meta'
64
+
65
+ # Model parameters
66
+ temperature: float | None = 1.0
67
+ max_tokens: int | None = 600
68
+ frequency_penalty: float | None = 0.0
69
+ presence_penalty: float | None = 0.0
70
+ top_p: float | None = 0.75
71
+ top_k: int | None = 0 # Used by Cohere models
72
+
73
+ # Authentication
74
+ auth_type: str = 'API_KEY'
75
+ auth_profile: str = 'DEFAULT'
76
+
77
+ # Client configuration
78
+ timeout: float = 60.0
79
+
80
+ # Static properties
81
+ @property
82
+ def provider_name(self) -> str:
83
+ return 'oci-raw'
84
+
85
+ @property
86
+ def name(self) -> str:
87
+ # Return a shorter name for telemetry (max 100 chars)
88
+ if len(self.model_id) > 90:
89
+ # Extract the model name from the OCID
90
+ parts = self.model_id.split('.')
91
+ if len(parts) >= 4:
92
+ return f'oci-{self.provider}-{parts[3]}' # e.g., "oci-meta-us-chicago-1"
93
+ else:
94
+ return f'oci-{self.provider}-model'
95
+ return self.model_id
96
+
97
+ @property
98
+ def model(self) -> str:
99
+ return self.model_id
100
+
101
+ @property
102
+ def model_name(self) -> str:
103
+ # Override for telemetry - return shorter name (max 100 chars)
104
+ if len(self.model_id) > 90:
105
+ # Extract the model name from the OCID
106
+ parts = self.model_id.split('.')
107
+ if len(parts) >= 4:
108
+ return f'oci-{self.provider}-{parts[3]}' # e.g., "oci-meta-us-chicago-1"
109
+ else:
110
+ return f'oci-{self.provider}-model'
111
+ return self.model_id
112
+
113
+ def _uses_cohere_format(self) -> bool:
114
+ """Check if the provider uses Cohere chat request format."""
115
+ return self.provider.lower() == 'cohere'
116
+
117
+ def _get_supported_parameters(self) -> dict[str, bool]:
118
+ """Get which parameters are supported by the current provider."""
119
+ provider = self.provider.lower()
120
+ if provider == 'meta':
121
+ return {
122
+ 'temperature': True,
123
+ 'max_tokens': True,
124
+ 'frequency_penalty': True,
125
+ 'presence_penalty': True,
126
+ 'top_p': True,
127
+ 'top_k': False,
128
+ }
129
+ elif provider == 'cohere':
130
+ return {
131
+ 'temperature': True,
132
+ 'max_tokens': True,
133
+ 'frequency_penalty': True,
134
+ 'presence_penalty': False,
135
+ 'top_p': True,
136
+ 'top_k': True,
137
+ }
138
+ elif provider == 'xai':
139
+ return {
140
+ 'temperature': True,
141
+ 'max_tokens': True,
142
+ 'frequency_penalty': False,
143
+ 'presence_penalty': False,
144
+ 'top_p': True,
145
+ 'top_k': True,
146
+ }
147
+ else:
148
+ # Default: assume all parameters are supported
149
+ return {
150
+ 'temperature': True,
151
+ 'max_tokens': True,
152
+ 'frequency_penalty': True,
153
+ 'presence_penalty': True,
154
+ 'top_p': True,
155
+ 'top_k': True,
156
+ }
157
+
158
+ def _get_oci_client(self) -> GenerativeAiInferenceClient:
159
+ """Get the OCI GenerativeAiInferenceClient following your working example."""
160
+ if not hasattr(self, '_client'):
161
+ # Configure OCI client based on auth_type (following your working example)
162
+ if self.auth_type == 'API_KEY':
163
+ config = oci.config.from_file('~/.oci/config', self.auth_profile)
164
+ self._client = GenerativeAiInferenceClient(
165
+ config=config,
166
+ service_endpoint=self.service_endpoint,
167
+ retry_strategy=oci.retry.NoneRetryStrategy(),
168
+ timeout=(10, 240), # Following your working example
169
+ )
170
+ elif self.auth_type == 'INSTANCE_PRINCIPAL':
171
+ config = {}
172
+ signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
173
+ self._client = GenerativeAiInferenceClient(
174
+ config=config,
175
+ signer=signer,
176
+ service_endpoint=self.service_endpoint,
177
+ retry_strategy=oci.retry.NoneRetryStrategy(),
178
+ timeout=(10, 240),
179
+ )
180
+ elif self.auth_type == 'RESOURCE_PRINCIPAL':
181
+ config = {}
182
+ signer = oci.auth.signers.get_resource_principals_signer()
183
+ self._client = GenerativeAiInferenceClient(
184
+ config=config,
185
+ signer=signer,
186
+ service_endpoint=self.service_endpoint,
187
+ retry_strategy=oci.retry.NoneRetryStrategy(),
188
+ timeout=(10, 240),
189
+ )
190
+ else:
191
+ # Fallback to API_KEY
192
+ config = oci.config.from_file('~/.oci/config', self.auth_profile)
193
+ self._client = GenerativeAiInferenceClient(
194
+ config=config,
195
+ service_endpoint=self.service_endpoint,
196
+ retry_strategy=oci.retry.NoneRetryStrategy(),
197
+ timeout=(10, 240),
198
+ )
199
+
200
+ return self._client
201
+
202
+ def _extract_usage(self, response) -> ChatInvokeUsage | None:
203
+ """Extract usage information from OCI response."""
204
+ try:
205
+ # The response is the direct OCI response object, not a dict
206
+ if hasattr(response, 'data') and hasattr(response.data, 'chat_response'):
207
+ chat_response = response.data.chat_response
208
+ if hasattr(chat_response, 'usage'):
209
+ usage = chat_response.usage
210
+ return ChatInvokeUsage(
211
+ prompt_tokens=getattr(usage, 'prompt_tokens', 0),
212
+ prompt_cached_tokens=None,
213
+ prompt_cache_creation_tokens=None,
214
+ prompt_image_tokens=None,
215
+ completion_tokens=getattr(usage, 'completion_tokens', 0),
216
+ total_tokens=getattr(usage, 'total_tokens', 0),
217
+ )
218
+ return None
219
+ except Exception:
220
+ return None
221
+
222
+ def _extract_content(self, response) -> str:
223
+ """Extract text content from OCI response."""
224
+ try:
225
+ # The response is the direct OCI response object, not a dict
226
+ if not hasattr(response, 'data'):
227
+ raise ModelProviderError(message='Invalid response format: no data attribute', status_code=500, model=self.name)
228
+
229
+ chat_response = response.data.chat_response
230
+
231
+ # Handle different response types based on provider
232
+ if hasattr(chat_response, 'text'):
233
+ # Cohere response format - has direct text attribute
234
+ return chat_response.text or ''
235
+ elif hasattr(chat_response, 'choices') and chat_response.choices:
236
+ # Generic response format - has choices array (Meta, xAI)
237
+ choice = chat_response.choices[0]
238
+ message = choice.message
239
+ content_parts = message.content
240
+
241
+ # Extract text from content parts
242
+ text_parts = []
243
+ for part in content_parts:
244
+ if hasattr(part, 'text'):
245
+ text_parts.append(part.text)
246
+
247
+ return '\n'.join(text_parts) if text_parts else ''
248
+ else:
249
+ raise ModelProviderError(
250
+ message=f'Unsupported response format: {type(chat_response).__name__}', status_code=500, model=self.name
251
+ )
252
+
253
+ except Exception as e:
254
+ raise ModelProviderError(
255
+ message=f'Failed to extract content from response: {str(e)}', status_code=500, model=self.name
256
+ ) from e
257
+
258
+ async def _make_request(self, messages: list[BaseMessage]):
259
+ """Make async request to OCI API using proper OCI SDK models."""
260
+
261
+ # Create chat request based on provider type
262
+ if self._uses_cohere_format():
263
+ # Cohere models use CohereChatRequest with single message string
264
+ message_text = OCIRawMessageSerializer.serialize_messages_for_cohere(messages)
265
+
266
+ chat_request = CohereChatRequest()
267
+ chat_request.message = message_text
268
+ chat_request.max_tokens = self.max_tokens
269
+ chat_request.temperature = self.temperature
270
+ chat_request.frequency_penalty = self.frequency_penalty
271
+ chat_request.top_p = self.top_p
272
+ chat_request.top_k = self.top_k
273
+ else:
274
+ # Meta, xAI and other models use GenericChatRequest with messages array
275
+ oci_messages = OCIRawMessageSerializer.serialize_messages(messages)
276
+
277
+ chat_request = GenericChatRequest()
278
+ chat_request.api_format = BaseChatRequest.API_FORMAT_GENERIC
279
+ chat_request.messages = oci_messages
280
+ chat_request.max_tokens = self.max_tokens
281
+ chat_request.temperature = self.temperature
282
+ chat_request.top_p = self.top_p
283
+
284
+ # Provider-specific parameters
285
+ if self.provider.lower() == 'meta':
286
+ # Meta models support frequency_penalty and presence_penalty
287
+ chat_request.frequency_penalty = self.frequency_penalty
288
+ chat_request.presence_penalty = self.presence_penalty
289
+ elif self.provider.lower() == 'xai':
290
+ # xAI models support top_k but not frequency_penalty or presence_penalty
291
+ chat_request.top_k = self.top_k
292
+ else:
293
+ # Default: include all parameters for unknown providers
294
+ chat_request.frequency_penalty = self.frequency_penalty
295
+ chat_request.presence_penalty = self.presence_penalty
296
+
297
+ # Create serving mode
298
+ serving_mode = OnDemandServingMode(model_id=self.model_id)
299
+
300
+ # Create chat details
301
+ chat_details = ChatDetails()
302
+ chat_details.serving_mode = serving_mode
303
+ chat_details.chat_request = chat_request
304
+ chat_details.compartment_id = self.compartment_id
305
+
306
+ # Make the request in a thread to avoid blocking
307
+ def _sync_request():
308
+ try:
309
+ client = self._get_oci_client()
310
+ response = client.chat(chat_details)
311
+ return response # Return the raw response object
312
+ except Exception as e:
313
+ # Handle OCI-specific exceptions
314
+ status_code = getattr(e, 'status', 500)
315
+ if status_code == 429:
316
+ raise ModelRateLimitError(message=f'Rate limit exceeded: {str(e)}', model=self.name) from e
317
+ else:
318
+ raise ModelProviderError(message=str(e), status_code=status_code, model=self.name) from e
319
+
320
+ # Run in thread pool to make it async
321
+ loop = asyncio.get_event_loop()
322
+ return await loop.run_in_executor(None, _sync_request)
323
+
324
+ @overload
325
+ async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
326
+
327
+ @overload
328
+ async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
329
+
330
+ async def ainvoke(
331
+ self, messages: list[BaseMessage], output_format: type[T] | None = None
332
+ ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
333
+ """
334
+ Invoke the OCI GenAI model with the given messages using raw API.
335
+
336
+ Args:
337
+ messages: List of chat messages
338
+ output_format: Optional Pydantic model class for structured output
339
+
340
+ Returns:
341
+ Either a string response or an instance of output_format
342
+ """
343
+ try:
344
+ if output_format is None:
345
+ # Return string response
346
+ response = await self._make_request(messages)
347
+ content = self._extract_content(response)
348
+ usage = self._extract_usage(response)
349
+
350
+ return ChatInvokeCompletion(
351
+ completion=content,
352
+ usage=usage,
353
+ )
354
+ else:
355
+ # For structured output, add JSON schema instructions
356
+ optimized_schema = SchemaOptimizer.create_optimized_json_schema(output_format)
357
+
358
+ # Add JSON schema instruction to messages
359
+ system_instruction = f"""
360
+ You must respond with ONLY a valid JSON object that matches this exact schema:
361
+ {json.dumps(optimized_schema, indent=2)}
362
+
363
+ IMPORTANT:
364
+ - Your response must be ONLY the JSON object, no additional text
365
+ - The JSON must be valid and parseable
366
+ - All required fields must be present
367
+ - No extra fields are allowed
368
+ - Use proper JSON syntax with double quotes
369
+ """
370
+
371
+ # Clone messages and add system instruction
372
+ modified_messages = messages.copy()
373
+
374
+ # Add or modify system message
375
+ from browser_use.llm.messages import SystemMessage
376
+
377
+ if modified_messages and hasattr(modified_messages[0], 'role') and modified_messages[0].role == 'system':
378
+ # Modify existing system message
379
+ existing_content = modified_messages[0].content
380
+ if isinstance(existing_content, str):
381
+ modified_messages[0].content = existing_content + '\n\n' + system_instruction
382
+ else:
383
+ # Handle list content
384
+ modified_messages[0].content = str(existing_content) + '\n\n' + system_instruction
385
+ else:
386
+ # Insert new system message at the beginning
387
+ modified_messages.insert(0, SystemMessage(content=system_instruction))
388
+
389
+ response = await self._make_request(modified_messages)
390
+ response_text = self._extract_content(response)
391
+
392
+ # Clean and parse the JSON response
393
+ try:
394
+ # Clean the response text
395
+ cleaned_text = response_text.strip()
396
+
397
+ # Remove markdown code blocks if present
398
+ if cleaned_text.startswith('```json'):
399
+ cleaned_text = cleaned_text[7:]
400
+ if cleaned_text.startswith('```'):
401
+ cleaned_text = cleaned_text[3:]
402
+ if cleaned_text.endswith('```'):
403
+ cleaned_text = cleaned_text[:-3]
404
+
405
+ cleaned_text = cleaned_text.strip()
406
+
407
+ # Try to find JSON object in the response
408
+ if not cleaned_text.startswith('{'):
409
+ start_idx = cleaned_text.find('{')
410
+ end_idx = cleaned_text.rfind('}')
411
+ if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
412
+ cleaned_text = cleaned_text[start_idx : end_idx + 1]
413
+
414
+ # Parse the JSON
415
+ parsed_data = json.loads(cleaned_text)
416
+ parsed = output_format.model_validate(parsed_data)
417
+
418
+ usage = self._extract_usage(response)
419
+ return ChatInvokeCompletion(
420
+ completion=parsed,
421
+ usage=usage,
422
+ )
423
+
424
+ except (json.JSONDecodeError, ValueError) as e:
425
+ raise ModelProviderError(
426
+ message=f'Failed to parse structured output: {str(e)}. Response was: {response_text[:200]}...',
427
+ status_code=500,
428
+ model=self.name,
429
+ ) from e
430
+
431
+ except ModelRateLimitError:
432
+ # Re-raise rate limit errors as-is
433
+ raise
434
+ except ModelProviderError:
435
+ # Re-raise provider errors as-is
436
+ raise
437
+ except Exception as e:
438
+ # Handle any other exceptions
439
+ raise ModelProviderError(
440
+ message=f'Unexpected error: {str(e)}',
441
+ status_code=500,
442
+ model=self.name,
443
+ ) from e
@@ -0,0 +1,229 @@
1
+ """
2
+ Message serializer for OCI Raw API integration.
3
+
4
+ This module handles the conversion between browser-use message formats
5
+ and the OCI Raw API message format using proper OCI SDK models.
6
+ """
7
+
8
+ from oci.generative_ai_inference.models import ImageContent, ImageUrl, Message, TextContent
9
+
10
+ from browser_use.llm.messages import (
11
+ AssistantMessage,
12
+ BaseMessage,
13
+ ContentPartImageParam,
14
+ SystemMessage,
15
+ UserMessage,
16
+ )
17
+
18
+
19
+ class OCIRawMessageSerializer:
20
+ """
21
+ Serializer for converting between browser-use message types and OCI Raw API message formats.
22
+ Uses proper OCI SDK model objects as shown in the working example.
23
+
24
+ Supports both:
25
+ - GenericChatRequest (Meta, xAI models) - uses messages array
26
+ - CohereChatRequest (Cohere models) - uses single message string
27
+ """
28
+
29
+ @staticmethod
30
+ def _is_base64_image(url: str) -> bool:
31
+ """Check if the URL is a base64 encoded image."""
32
+ return url.startswith('data:image/')
33
+
34
+ @staticmethod
35
+ def _parse_base64_url(url: str) -> str:
36
+ """Parse base64 URL and return the base64 data."""
37
+ if not OCIRawMessageSerializer._is_base64_image(url):
38
+ raise ValueError(f'Not a base64 image URL: {url}')
39
+
40
+ # Extract the base64 data from data:image/png;base64,<data>
41
+ try:
42
+ header, data = url.split(',', 1)
43
+ return data
44
+ except ValueError:
45
+ raise ValueError(f'Invalid base64 image URL format: {url}')
46
+
47
+ @staticmethod
48
+ def _create_image_content(part: ContentPartImageParam) -> ImageContent:
49
+ """Convert ContentPartImageParam to OCI ImageContent."""
50
+ url = part.image_url.url
51
+
52
+ if OCIRawMessageSerializer._is_base64_image(url):
53
+ # Handle base64 encoded images - OCI expects data URLs as-is
54
+ image_url = ImageUrl(url=url)
55
+ else:
56
+ # Handle regular URLs
57
+ image_url = ImageUrl(url=url)
58
+
59
+ return ImageContent(image_url=image_url)
60
+
61
+ @staticmethod
62
+ def serialize_messages(messages: list[BaseMessage]) -> list[Message]:
63
+ """
64
+ Serialize a list of browser-use messages to OCI Raw API Message objects.
65
+
66
+ Args:
67
+ messages: List of browser-use messages
68
+
69
+ Returns:
70
+ List of OCI Message objects
71
+ """
72
+ oci_messages = []
73
+
74
+ for message in messages:
75
+ oci_message = Message()
76
+
77
+ if isinstance(message, UserMessage):
78
+ oci_message.role = 'USER'
79
+ content = message.content
80
+ if isinstance(content, str):
81
+ text_content = TextContent()
82
+ text_content.text = content
83
+ oci_message.content = [text_content]
84
+ elif isinstance(content, list):
85
+ # Handle content parts - text and images
86
+ contents = []
87
+ for part in content:
88
+ if part.type == 'text':
89
+ text_content = TextContent()
90
+ text_content.text = part.text
91
+ contents.append(text_content)
92
+ elif part.type == 'image_url':
93
+ image_content = OCIRawMessageSerializer._create_image_content(part)
94
+ contents.append(image_content)
95
+ if contents:
96
+ oci_message.content = contents
97
+
98
+ elif isinstance(message, SystemMessage):
99
+ oci_message.role = 'SYSTEM'
100
+ content = message.content
101
+ if isinstance(content, str):
102
+ text_content = TextContent()
103
+ text_content.text = content
104
+ oci_message.content = [text_content]
105
+ elif isinstance(content, list):
106
+ # Handle content parts - typically just text for system messages
107
+ contents = []
108
+ for part in content:
109
+ if part.type == 'text':
110
+ text_content = TextContent()
111
+ text_content.text = part.text
112
+ contents.append(text_content)
113
+ elif part.type == 'image_url':
114
+ # System messages can theoretically have images too
115
+ image_content = OCIRawMessageSerializer._create_image_content(part)
116
+ contents.append(image_content)
117
+ if contents:
118
+ oci_message.content = contents
119
+
120
+ elif isinstance(message, AssistantMessage):
121
+ oci_message.role = 'ASSISTANT'
122
+ content = message.content
123
+ if isinstance(content, str):
124
+ text_content = TextContent()
125
+ text_content.text = content
126
+ oci_message.content = [text_content]
127
+ elif isinstance(content, list):
128
+ # Handle content parts - text, images, and refusals
129
+ contents = []
130
+ for part in content:
131
+ if part.type == 'text':
132
+ text_content = TextContent()
133
+ text_content.text = part.text
134
+ contents.append(text_content)
135
+ elif part.type == 'image_url':
136
+ # Assistant messages can have images in responses
137
+ # Note: This is currently unreachable in browser-use but kept for completeness
138
+ image_content = OCIRawMessageSerializer._create_image_content(part)
139
+ contents.append(image_content)
140
+ elif part.type == 'refusal':
141
+ text_content = TextContent()
142
+ text_content.text = f'[Refusal] {part.refusal}'
143
+ contents.append(text_content)
144
+ if contents:
145
+ oci_message.content = contents
146
+ else:
147
+ # Fallback for any message format issues
148
+ oci_message.role = 'USER'
149
+ text_content = TextContent()
150
+ text_content.text = str(message)
151
+ oci_message.content = [text_content]
152
+
153
+ # Only append messages that have content
154
+ if hasattr(oci_message, 'content') and oci_message.content:
155
+ oci_messages.append(oci_message)
156
+
157
+ return oci_messages
158
+
159
+ @staticmethod
160
+ def serialize_messages_for_cohere(messages: list[BaseMessage]) -> str:
161
+ """
162
+ Serialize messages for Cohere models which expect a single message string.
163
+
164
+ Cohere models use CohereChatRequest.message (string) instead of messages array.
165
+ We combine all messages into a single conversation string.
166
+
167
+ Args:
168
+ messages: List of browser-use messages
169
+
170
+ Returns:
171
+ Single string containing the conversation
172
+ """
173
+ conversation_parts = []
174
+
175
+ for message in messages:
176
+ content = ''
177
+
178
+ if isinstance(message, UserMessage):
179
+ if isinstance(message.content, str):
180
+ content = message.content
181
+ elif isinstance(message.content, list):
182
+ # Extract text from content parts
183
+ text_parts = []
184
+ for part in message.content:
185
+ if part.type == 'text':
186
+ text_parts.append(part.text)
187
+ elif part.type == 'image_url':
188
+ # Cohere may not support images in all models, use a short placeholder
189
+ # to avoid massive token usage from base64 data URIs
190
+ if part.image_url.url.startswith('data:image/'):
191
+ text_parts.append('[Image: base64_data]')
192
+ else:
193
+ text_parts.append('[Image: external_url]')
194
+ content = ' '.join(text_parts)
195
+
196
+ conversation_parts.append(f'User: {content}')
197
+
198
+ elif isinstance(message, SystemMessage):
199
+ if isinstance(message.content, str):
200
+ content = message.content
201
+ elif isinstance(message.content, list):
202
+ # Extract text from content parts
203
+ text_parts = []
204
+ for part in message.content:
205
+ if part.type == 'text':
206
+ text_parts.append(part.text)
207
+ content = ' '.join(text_parts)
208
+
209
+ conversation_parts.append(f'System: {content}')
210
+
211
+ elif isinstance(message, AssistantMessage):
212
+ if isinstance(message.content, str):
213
+ content = message.content
214
+ elif isinstance(message.content, list):
215
+ # Extract text from content parts
216
+ text_parts = []
217
+ for part in message.content:
218
+ if part.type == 'text':
219
+ text_parts.append(part.text)
220
+ elif part.type == 'refusal':
221
+ text_parts.append(f'[Refusal] {part.refusal}')
222
+ content = ' '.join(text_parts)
223
+
224
+ conversation_parts.append(f'Assistant: {content}')
225
+ else:
226
+ # Fallback
227
+ conversation_parts.append(f'User: {str(message)}')
228
+
229
+ return '\n\n'.join(conversation_parts)