optexity-browser-use 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. browser_use/__init__.py +157 -0
  2. browser_use/actor/__init__.py +11 -0
  3. browser_use/actor/element.py +1175 -0
  4. browser_use/actor/mouse.py +134 -0
  5. browser_use/actor/page.py +561 -0
  6. browser_use/actor/playground/flights.py +41 -0
  7. browser_use/actor/playground/mixed_automation.py +54 -0
  8. browser_use/actor/playground/playground.py +236 -0
  9. browser_use/actor/utils.py +176 -0
  10. browser_use/agent/cloud_events.py +282 -0
  11. browser_use/agent/gif.py +424 -0
  12. browser_use/agent/judge.py +170 -0
  13. browser_use/agent/message_manager/service.py +473 -0
  14. browser_use/agent/message_manager/utils.py +52 -0
  15. browser_use/agent/message_manager/views.py +98 -0
  16. browser_use/agent/prompts.py +413 -0
  17. browser_use/agent/service.py +2316 -0
  18. browser_use/agent/system_prompt.md +185 -0
  19. browser_use/agent/system_prompt_flash.md +10 -0
  20. browser_use/agent/system_prompt_no_thinking.md +183 -0
  21. browser_use/agent/views.py +743 -0
  22. browser_use/browser/__init__.py +41 -0
  23. browser_use/browser/cloud/cloud.py +203 -0
  24. browser_use/browser/cloud/views.py +89 -0
  25. browser_use/browser/events.py +578 -0
  26. browser_use/browser/profile.py +1158 -0
  27. browser_use/browser/python_highlights.py +548 -0
  28. browser_use/browser/session.py +3225 -0
  29. browser_use/browser/session_manager.py +399 -0
  30. browser_use/browser/video_recorder.py +162 -0
  31. browser_use/browser/views.py +200 -0
  32. browser_use/browser/watchdog_base.py +260 -0
  33. browser_use/browser/watchdogs/__init__.py +0 -0
  34. browser_use/browser/watchdogs/aboutblank_watchdog.py +253 -0
  35. browser_use/browser/watchdogs/crash_watchdog.py +335 -0
  36. browser_use/browser/watchdogs/default_action_watchdog.py +2729 -0
  37. browser_use/browser/watchdogs/dom_watchdog.py +817 -0
  38. browser_use/browser/watchdogs/downloads_watchdog.py +1277 -0
  39. browser_use/browser/watchdogs/local_browser_watchdog.py +461 -0
  40. browser_use/browser/watchdogs/permissions_watchdog.py +43 -0
  41. browser_use/browser/watchdogs/popups_watchdog.py +143 -0
  42. browser_use/browser/watchdogs/recording_watchdog.py +126 -0
  43. browser_use/browser/watchdogs/screenshot_watchdog.py +62 -0
  44. browser_use/browser/watchdogs/security_watchdog.py +280 -0
  45. browser_use/browser/watchdogs/storage_state_watchdog.py +335 -0
  46. browser_use/cli.py +2359 -0
  47. browser_use/code_use/__init__.py +16 -0
  48. browser_use/code_use/formatting.py +192 -0
  49. browser_use/code_use/namespace.py +665 -0
  50. browser_use/code_use/notebook_export.py +276 -0
  51. browser_use/code_use/service.py +1340 -0
  52. browser_use/code_use/system_prompt.md +574 -0
  53. browser_use/code_use/utils.py +150 -0
  54. browser_use/code_use/views.py +171 -0
  55. browser_use/config.py +505 -0
  56. browser_use/controller/__init__.py +3 -0
  57. browser_use/dom/enhanced_snapshot.py +161 -0
  58. browser_use/dom/markdown_extractor.py +169 -0
  59. browser_use/dom/playground/extraction.py +312 -0
  60. browser_use/dom/playground/multi_act.py +32 -0
  61. browser_use/dom/serializer/clickable_elements.py +200 -0
  62. browser_use/dom/serializer/code_use_serializer.py +287 -0
  63. browser_use/dom/serializer/eval_serializer.py +478 -0
  64. browser_use/dom/serializer/html_serializer.py +212 -0
  65. browser_use/dom/serializer/paint_order.py +197 -0
  66. browser_use/dom/serializer/serializer.py +1170 -0
  67. browser_use/dom/service.py +825 -0
  68. browser_use/dom/utils.py +129 -0
  69. browser_use/dom/views.py +906 -0
  70. browser_use/exceptions.py +5 -0
  71. browser_use/filesystem/__init__.py +0 -0
  72. browser_use/filesystem/file_system.py +619 -0
  73. browser_use/init_cmd.py +376 -0
  74. browser_use/integrations/gmail/__init__.py +24 -0
  75. browser_use/integrations/gmail/actions.py +115 -0
  76. browser_use/integrations/gmail/service.py +225 -0
  77. browser_use/llm/__init__.py +155 -0
  78. browser_use/llm/anthropic/chat.py +242 -0
  79. browser_use/llm/anthropic/serializer.py +312 -0
  80. browser_use/llm/aws/__init__.py +36 -0
  81. browser_use/llm/aws/chat_anthropic.py +242 -0
  82. browser_use/llm/aws/chat_bedrock.py +289 -0
  83. browser_use/llm/aws/serializer.py +257 -0
  84. browser_use/llm/azure/chat.py +91 -0
  85. browser_use/llm/base.py +57 -0
  86. browser_use/llm/browser_use/__init__.py +3 -0
  87. browser_use/llm/browser_use/chat.py +201 -0
  88. browser_use/llm/cerebras/chat.py +193 -0
  89. browser_use/llm/cerebras/serializer.py +109 -0
  90. browser_use/llm/deepseek/chat.py +212 -0
  91. browser_use/llm/deepseek/serializer.py +109 -0
  92. browser_use/llm/exceptions.py +29 -0
  93. browser_use/llm/google/__init__.py +3 -0
  94. browser_use/llm/google/chat.py +542 -0
  95. browser_use/llm/google/serializer.py +120 -0
  96. browser_use/llm/groq/chat.py +229 -0
  97. browser_use/llm/groq/parser.py +158 -0
  98. browser_use/llm/groq/serializer.py +159 -0
  99. browser_use/llm/messages.py +238 -0
  100. browser_use/llm/models.py +271 -0
  101. browser_use/llm/oci_raw/__init__.py +10 -0
  102. browser_use/llm/oci_raw/chat.py +443 -0
  103. browser_use/llm/oci_raw/serializer.py +229 -0
  104. browser_use/llm/ollama/chat.py +97 -0
  105. browser_use/llm/ollama/serializer.py +143 -0
  106. browser_use/llm/openai/chat.py +264 -0
  107. browser_use/llm/openai/like.py +15 -0
  108. browser_use/llm/openai/serializer.py +165 -0
  109. browser_use/llm/openrouter/chat.py +211 -0
  110. browser_use/llm/openrouter/serializer.py +26 -0
  111. browser_use/llm/schema.py +176 -0
  112. browser_use/llm/views.py +48 -0
  113. browser_use/logging_config.py +330 -0
  114. browser_use/mcp/__init__.py +18 -0
  115. browser_use/mcp/__main__.py +12 -0
  116. browser_use/mcp/client.py +544 -0
  117. browser_use/mcp/controller.py +264 -0
  118. browser_use/mcp/server.py +1114 -0
  119. browser_use/observability.py +204 -0
  120. browser_use/py.typed +0 -0
  121. browser_use/sandbox/__init__.py +41 -0
  122. browser_use/sandbox/sandbox.py +637 -0
  123. browser_use/sandbox/views.py +132 -0
  124. browser_use/screenshots/__init__.py +1 -0
  125. browser_use/screenshots/service.py +52 -0
  126. browser_use/sync/__init__.py +6 -0
  127. browser_use/sync/auth.py +357 -0
  128. browser_use/sync/service.py +161 -0
  129. browser_use/telemetry/__init__.py +51 -0
  130. browser_use/telemetry/service.py +112 -0
  131. browser_use/telemetry/views.py +101 -0
  132. browser_use/tokens/__init__.py +0 -0
  133. browser_use/tokens/custom_pricing.py +24 -0
  134. browser_use/tokens/mappings.py +4 -0
  135. browser_use/tokens/service.py +580 -0
  136. browser_use/tokens/views.py +108 -0
  137. browser_use/tools/registry/service.py +572 -0
  138. browser_use/tools/registry/views.py +174 -0
  139. browser_use/tools/service.py +1675 -0
  140. browser_use/tools/utils.py +82 -0
  141. browser_use/tools/views.py +100 -0
  142. browser_use/utils.py +670 -0
  143. optexity_browser_use-0.9.5.dist-info/METADATA +344 -0
  144. optexity_browser_use-0.9.5.dist-info/RECORD +147 -0
  145. optexity_browser_use-0.9.5.dist-info/WHEEL +4 -0
  146. optexity_browser_use-0.9.5.dist-info/entry_points.txt +3 -0
  147. optexity_browser_use-0.9.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,225 @@
1
+ """
2
+ Gmail API Service for Browser Use
3
+ Handles Gmail API authentication, email reading, and 2FA code extraction.
4
+ This service provides a clean interface for agents to interact with Gmail.
5
+ """
6
+
7
+ import base64
8
+ import logging
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ import anyio
14
+ from google.auth.transport.requests import Request
15
+ from google.oauth2.credentials import Credentials
16
+ from google_auth_oauthlib.flow import InstalledAppFlow
17
+ from googleapiclient.discovery import build
18
+ from googleapiclient.errors import HttpError
19
+
20
+ from browser_use.config import CONFIG
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class GmailService:
26
+ """
27
+ Gmail API service for email reading.
28
+ Provides functionality to:
29
+ - Authenticate with Gmail API using OAuth2
30
+ - Read recent emails with filtering
31
+ - Return full email content for agent analysis
32
+ """
33
+
34
+ # Gmail API scopes
35
+ SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
36
+
37
+ def __init__(
38
+ self,
39
+ credentials_file: str | None = None,
40
+ token_file: str | None = None,
41
+ config_dir: str | None = None,
42
+ access_token: str | None = None,
43
+ ):
44
+ """
45
+ Initialize Gmail Service
46
+ Args:
47
+ credentials_file: Path to OAuth credentials JSON from Google Cloud Console
48
+ token_file: Path to store/load access tokens
49
+ config_dir: Directory to store config files (defaults to browser-use config directory)
50
+ access_token: Direct access token (skips file-based auth if provided)
51
+ """
52
+ # Set up configuration directory using browser-use's config system
53
+ if config_dir is None:
54
+ self.config_dir = CONFIG.BROWSER_USE_CONFIG_DIR
55
+ else:
56
+ self.config_dir = Path(config_dir).expanduser().resolve()
57
+
58
+ # Ensure config directory exists (only if not using direct token)
59
+ if access_token is None:
60
+ self.config_dir.mkdir(parents=True, exist_ok=True)
61
+
62
+ # Set up credential paths
63
+ self.credentials_file = credentials_file or self.config_dir / 'gmail_credentials.json'
64
+ self.token_file = token_file or self.config_dir / 'gmail_token.json'
65
+
66
+ # Direct access token support
67
+ self.access_token = access_token
68
+
69
+ self.service = None
70
+ self.creds = None
71
+ self._authenticated = False
72
+
73
+ def is_authenticated(self) -> bool:
74
+ """Check if Gmail service is authenticated"""
75
+ return self._authenticated and self.service is not None
76
+
77
+ async def authenticate(self) -> bool:
78
+ """
79
+ Handle OAuth authentication and token management
80
+ Returns:
81
+ bool: True if authentication successful, False otherwise
82
+ """
83
+ try:
84
+ logger.info('🔐 Authenticating with Gmail API...')
85
+
86
+ # Check if using direct access token
87
+ if self.access_token:
88
+ logger.info('🔑 Using provided access token')
89
+ # Create credentials from access token
90
+ self.creds = Credentials(token=self.access_token, scopes=self.SCOPES)
91
+ # Test token validity by building service
92
+ self.service = build('gmail', 'v1', credentials=self.creds)
93
+ self._authenticated = True
94
+ logger.info('✅ Gmail API ready with access token!')
95
+ return True
96
+
97
+ # Original file-based authentication flow
98
+ # Try to load existing tokens
99
+ if os.path.exists(self.token_file):
100
+ self.creds = Credentials.from_authorized_user_file(str(self.token_file), self.SCOPES)
101
+ logger.debug('📁 Loaded existing tokens')
102
+
103
+ # If no valid credentials, run OAuth flow
104
+ if not self.creds or not self.creds.valid:
105
+ if self.creds and self.creds.expired and self.creds.refresh_token:
106
+ logger.info('🔄 Refreshing expired tokens...')
107
+ self.creds.refresh(Request())
108
+ else:
109
+ logger.info('🌐 Starting OAuth flow...')
110
+ if not os.path.exists(self.credentials_file):
111
+ logger.error(
112
+ f'❌ Gmail credentials file not found: {self.credentials_file}\n'
113
+ 'Please download it from Google Cloud Console:\n'
114
+ '1. Go to https://console.cloud.google.com/\n'
115
+ '2. APIs & Services > Credentials\n'
116
+ '3. Download OAuth 2.0 Client JSON\n'
117
+ f"4. Save as 'gmail_credentials.json' in {self.config_dir}/"
118
+ )
119
+ return False
120
+
121
+ flow = InstalledAppFlow.from_client_secrets_file(str(self.credentials_file), self.SCOPES)
122
+ # Use specific redirect URI to match OAuth credentials
123
+ self.creds = flow.run_local_server(port=8080, open_browser=True)
124
+
125
+ # Save tokens for next time
126
+ await anyio.Path(self.token_file).write_text(self.creds.to_json())
127
+ logger.info(f'💾 Tokens saved to {self.token_file}')
128
+
129
+ # Build Gmail service
130
+ self.service = build('gmail', 'v1', credentials=self.creds)
131
+ self._authenticated = True
132
+ logger.info('✅ Gmail API ready!')
133
+ return True
134
+
135
+ except Exception as e:
136
+ logger.error(f'❌ Gmail authentication failed: {e}')
137
+ return False
138
+
139
+ async def get_recent_emails(self, max_results: int = 10, query: str = '', time_filter: str = '1h') -> list[dict[str, Any]]:
140
+ """
141
+ Get recent emails with optional query filter
142
+ Args:
143
+ max_results: Maximum number of emails to fetch
144
+ query: Gmail search query (e.g., 'from:noreply@example.com')
145
+ time_filter: Time filter (e.g., '5m', '1h', '1d')
146
+ Returns:
147
+ List of email dictionaries with parsed content
148
+ """
149
+ if not self.is_authenticated():
150
+ logger.error('❌ Gmail service not authenticated. Call authenticate() first.')
151
+ return []
152
+
153
+ try:
154
+ # Add time filter to query if provided
155
+ if time_filter and 'newer_than:' not in query:
156
+ query = f'newer_than:{time_filter} {query}'.strip()
157
+
158
+ logger.info(f'📧 Fetching {max_results} recent emails...')
159
+ if query:
160
+ logger.debug(f'🔍 Query: {query}')
161
+
162
+ # Get message list
163
+ assert self.service is not None
164
+ results = self.service.users().messages().list(userId='me', maxResults=max_results, q=query).execute()
165
+
166
+ messages = results.get('messages', [])
167
+ if not messages:
168
+ logger.info('📭 No messages found')
169
+ return []
170
+
171
+ logger.info(f'📨 Found {len(messages)} messages, fetching details...')
172
+
173
+ # Get full message details
174
+ emails = []
175
+ for i, message in enumerate(messages, 1):
176
+ logger.debug(f'📖 Reading email {i}/{len(messages)}...')
177
+
178
+ full_message = self.service.users().messages().get(userId='me', id=message['id'], format='full').execute()
179
+
180
+ email_data = self._parse_email(full_message)
181
+ emails.append(email_data)
182
+
183
+ return emails
184
+
185
+ except HttpError as error:
186
+ logger.error(f'❌ Gmail API error: {error}')
187
+ return []
188
+ except Exception as e:
189
+ logger.error(f'❌ Unexpected error fetching emails: {e}')
190
+ return []
191
+
192
+ def _parse_email(self, message: dict[str, Any]) -> dict[str, Any]:
193
+ """Parse Gmail message into readable format"""
194
+ headers = {h['name']: h['value'] for h in message['payload']['headers']}
195
+
196
+ return {
197
+ 'id': message['id'],
198
+ 'thread_id': message['threadId'],
199
+ 'subject': headers.get('Subject', ''),
200
+ 'from': headers.get('From', ''),
201
+ 'to': headers.get('To', ''),
202
+ 'date': headers.get('Date', ''),
203
+ 'timestamp': int(message['internalDate']),
204
+ 'body': self._extract_body(message['payload']),
205
+ 'raw_message': message,
206
+ }
207
+
208
+ def _extract_body(self, payload: dict[str, Any]) -> str:
209
+ """Extract email body from payload"""
210
+ body = ''
211
+
212
+ if payload.get('body', {}).get('data'):
213
+ # Simple email body
214
+ body = base64.urlsafe_b64decode(payload['body']['data']).decode('utf-8')
215
+ elif payload.get('parts'):
216
+ # Multi-part email
217
+ for part in payload['parts']:
218
+ if part['mimeType'] == 'text/plain' and part.get('body', {}).get('data'):
219
+ part_body = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8')
220
+ body += part_body
221
+ elif part['mimeType'] == 'text/html' and not body and part.get('body', {}).get('data'):
222
+ # Fallback to HTML if no plain text
223
+ body = base64.urlsafe_b64decode(part['body']['data']).decode('utf-8')
224
+
225
+ return body
@@ -0,0 +1,155 @@
1
+ """
2
+ We have switched all of our code from langchain to openai.types.chat.chat_completion_message_param.
3
+
4
+ For easier transition we have
5
+ """
6
+
7
+ from typing import TYPE_CHECKING
8
+
9
+ # Lightweight imports that are commonly used
10
+ from browser_use.llm.base import BaseChatModel
11
+ from browser_use.llm.messages import (
12
+ AssistantMessage,
13
+ BaseMessage,
14
+ SystemMessage,
15
+ UserMessage,
16
+ )
17
+ from browser_use.llm.messages import (
18
+ ContentPartImageParam as ContentImage,
19
+ )
20
+ from browser_use.llm.messages import (
21
+ ContentPartRefusalParam as ContentRefusal,
22
+ )
23
+ from browser_use.llm.messages import (
24
+ ContentPartTextParam as ContentText,
25
+ )
26
+
27
+ # Type stubs for lazy imports
28
+ if TYPE_CHECKING:
29
+ from browser_use.llm.anthropic.chat import ChatAnthropic
30
+ from browser_use.llm.aws.chat_anthropic import ChatAnthropicBedrock
31
+ from browser_use.llm.aws.chat_bedrock import ChatAWSBedrock
32
+ from browser_use.llm.azure.chat import ChatAzureOpenAI
33
+ from browser_use.llm.browser_use.chat import ChatBrowserUse
34
+ from browser_use.llm.cerebras.chat import ChatCerebras
35
+ from browser_use.llm.deepseek.chat import ChatDeepSeek
36
+ from browser_use.llm.google.chat import ChatGoogle
37
+ from browser_use.llm.groq.chat import ChatGroq
38
+ from browser_use.llm.oci_raw.chat import ChatOCIRaw
39
+ from browser_use.llm.ollama.chat import ChatOllama
40
+ from browser_use.llm.openai.chat import ChatOpenAI
41
+ from browser_use.llm.openrouter.chat import ChatOpenRouter
42
+
43
+ # Type stubs for model instances - enables IDE autocomplete
44
+ openai_gpt_4o: ChatOpenAI
45
+ openai_gpt_4o_mini: ChatOpenAI
46
+ openai_gpt_4_1_mini: ChatOpenAI
47
+ openai_o1: ChatOpenAI
48
+ openai_o1_mini: ChatOpenAI
49
+ openai_o1_pro: ChatOpenAI
50
+ openai_o3: ChatOpenAI
51
+ openai_o3_mini: ChatOpenAI
52
+ openai_o3_pro: ChatOpenAI
53
+ openai_o4_mini: ChatOpenAI
54
+ openai_gpt_5: ChatOpenAI
55
+ openai_gpt_5_mini: ChatOpenAI
56
+ openai_gpt_5_nano: ChatOpenAI
57
+
58
+ azure_gpt_4o: ChatAzureOpenAI
59
+ azure_gpt_4o_mini: ChatAzureOpenAI
60
+ azure_gpt_4_1_mini: ChatAzureOpenAI
61
+ azure_o1: ChatAzureOpenAI
62
+ azure_o1_mini: ChatAzureOpenAI
63
+ azure_o1_pro: ChatAzureOpenAI
64
+ azure_o3: ChatAzureOpenAI
65
+ azure_o3_mini: ChatAzureOpenAI
66
+ azure_o3_pro: ChatAzureOpenAI
67
+ azure_gpt_5: ChatAzureOpenAI
68
+ azure_gpt_5_mini: ChatAzureOpenAI
69
+
70
+ google_gemini_2_0_flash: ChatGoogle
71
+ google_gemini_2_0_pro: ChatGoogle
72
+ google_gemini_2_5_pro: ChatGoogle
73
+ google_gemini_2_5_flash: ChatGoogle
74
+ google_gemini_2_5_flash_lite: ChatGoogle
75
+
76
+ # Models are imported on-demand via __getattr__
77
+
78
+ # Lazy imports mapping for heavy chat models
79
+ _LAZY_IMPORTS = {
80
+ 'ChatAnthropic': ('browser_use.llm.anthropic.chat', 'ChatAnthropic'),
81
+ 'ChatAnthropicBedrock': ('browser_use.llm.aws.chat_anthropic', 'ChatAnthropicBedrock'),
82
+ 'ChatAWSBedrock': ('browser_use.llm.aws.chat_bedrock', 'ChatAWSBedrock'),
83
+ 'ChatAzureOpenAI': ('browser_use.llm.azure.chat', 'ChatAzureOpenAI'),
84
+ 'ChatBrowserUse': ('browser_use.llm.browser_use.chat', 'ChatBrowserUse'),
85
+ 'ChatCerebras': ('browser_use.llm.cerebras.chat', 'ChatCerebras'),
86
+ 'ChatDeepSeek': ('browser_use.llm.deepseek.chat', 'ChatDeepSeek'),
87
+ 'ChatGoogle': ('browser_use.llm.google.chat', 'ChatGoogle'),
88
+ 'ChatGroq': ('browser_use.llm.groq.chat', 'ChatGroq'),
89
+ 'ChatOCIRaw': ('browser_use.llm.oci_raw.chat', 'ChatOCIRaw'),
90
+ 'ChatOllama': ('browser_use.llm.ollama.chat', 'ChatOllama'),
91
+ 'ChatOpenAI': ('browser_use.llm.openai.chat', 'ChatOpenAI'),
92
+ 'ChatOpenRouter': ('browser_use.llm.openrouter.chat', 'ChatOpenRouter'),
93
+ }
94
+
95
+ # Cache for model instances - only created when accessed
96
+ _model_cache: dict[str, 'BaseChatModel'] = {}
97
+
98
+
99
+ def __getattr__(name: str):
100
+ """Lazy import mechanism for heavy chat model imports and model instances."""
101
+ if name in _LAZY_IMPORTS:
102
+ module_path, attr_name = _LAZY_IMPORTS[name]
103
+ try:
104
+ from importlib import import_module
105
+
106
+ module = import_module(module_path)
107
+ attr = getattr(module, attr_name)
108
+ return attr
109
+ except ImportError as e:
110
+ raise ImportError(f'Failed to import {name} from {module_path}: {e}') from e
111
+
112
+ # Check cache first for model instances
113
+ if name in _model_cache:
114
+ return _model_cache[name]
115
+
116
+ # Try to get model instances from models module on-demand
117
+ try:
118
+ from browser_use.llm.models import __getattr__ as models_getattr
119
+
120
+ attr = models_getattr(name)
121
+ # Cache in our clean cache dict
122
+ _model_cache[name] = attr
123
+ return attr
124
+ except (AttributeError, ImportError):
125
+ pass
126
+
127
+ raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
128
+
129
+
130
+ __all__ = [
131
+ # Message types -> for easier transition from langchain
132
+ 'BaseMessage',
133
+ 'UserMessage',
134
+ 'SystemMessage',
135
+ 'AssistantMessage',
136
+ # Content parts with better names
137
+ 'ContentText',
138
+ 'ContentRefusal',
139
+ 'ContentImage',
140
+ # Chat models
141
+ 'BaseChatModel',
142
+ 'ChatOpenAI',
143
+ 'ChatBrowserUse',
144
+ 'ChatDeepSeek',
145
+ 'ChatGoogle',
146
+ 'ChatAnthropic',
147
+ 'ChatAnthropicBedrock',
148
+ 'ChatAWSBedrock',
149
+ 'ChatGroq',
150
+ 'ChatAzureOpenAI',
151
+ 'ChatOCIRaw',
152
+ 'ChatOllama',
153
+ 'ChatOpenRouter',
154
+ 'ChatCerebras',
155
+ ]
@@ -0,0 +1,242 @@
1
+ import json
2
+ from collections.abc import Mapping
3
+ from dataclasses import dataclass
4
+ from typing import Any, TypeVar, overload
5
+
6
+ import httpx
7
+ from anthropic import (
8
+ APIConnectionError,
9
+ APIStatusError,
10
+ AsyncAnthropic,
11
+ NotGiven,
12
+ RateLimitError,
13
+ omit,
14
+ )
15
+ from anthropic.types import CacheControlEphemeralParam, Message, ToolParam
16
+ from anthropic.types.model_param import ModelParam
17
+ from anthropic.types.text_block import TextBlock
18
+ from anthropic.types.tool_choice_tool_param import ToolChoiceToolParam
19
+ from httpx import Timeout
20
+ from pydantic import BaseModel
21
+
22
+ from browser_use.llm.anthropic.serializer import AnthropicMessageSerializer
23
+ from browser_use.llm.base import BaseChatModel
24
+ from browser_use.llm.exceptions import ModelProviderError, ModelRateLimitError
25
+ from browser_use.llm.messages import BaseMessage
26
+ from browser_use.llm.schema import SchemaOptimizer
27
+ from browser_use.llm.views import ChatInvokeCompletion, ChatInvokeUsage
28
+
29
+ T = TypeVar('T', bound=BaseModel)
30
+
31
+
32
+ @dataclass
33
+ class ChatAnthropic(BaseChatModel):
34
+ """
35
+ A wrapper around Anthropic's chat model.
36
+ """
37
+
38
+ # Model configuration
39
+ model: str | ModelParam
40
+ max_tokens: int = 8192
41
+ temperature: float | None = None
42
+ top_p: float | None = None
43
+ seed: int | None = None
44
+
45
+ # Client initialization parameters
46
+ api_key: str | None = None
47
+ auth_token: str | None = None
48
+ base_url: str | httpx.URL | None = None
49
+ timeout: float | Timeout | None | NotGiven = NotGiven()
50
+ max_retries: int = 10
51
+ default_headers: Mapping[str, str] | None = None
52
+ default_query: Mapping[str, object] | None = None
53
+
54
+ # Static
55
+ @property
56
+ def provider(self) -> str:
57
+ return 'anthropic'
58
+
59
+ def _get_client_params(self) -> dict[str, Any]:
60
+ """Prepare client parameters dictionary."""
61
+ # Define base client params
62
+ base_params = {
63
+ 'api_key': self.api_key,
64
+ 'auth_token': self.auth_token,
65
+ 'base_url': self.base_url,
66
+ 'timeout': self.timeout,
67
+ 'max_retries': self.max_retries,
68
+ 'default_headers': self.default_headers,
69
+ 'default_query': self.default_query,
70
+ }
71
+
72
+ # Create client_params dict with non-None values and non-NotGiven values
73
+ client_params = {}
74
+ for k, v in base_params.items():
75
+ if v is not None and v is not NotGiven():
76
+ client_params[k] = v
77
+
78
+ return client_params
79
+
80
+ def _get_client_params_for_invoke(self):
81
+ """Prepare client parameters dictionary for invoke."""
82
+
83
+ client_params = {}
84
+
85
+ if self.temperature is not None:
86
+ client_params['temperature'] = self.temperature
87
+
88
+ if self.max_tokens is not None:
89
+ client_params['max_tokens'] = self.max_tokens
90
+
91
+ if self.top_p is not None:
92
+ client_params['top_p'] = self.top_p
93
+
94
+ if self.seed is not None:
95
+ client_params['seed'] = self.seed
96
+
97
+ return client_params
98
+
99
+ def get_client(self) -> AsyncAnthropic:
100
+ """
101
+ Returns an AsyncAnthropic client.
102
+
103
+ Returns:
104
+ AsyncAnthropic: An instance of the AsyncAnthropic client.
105
+ """
106
+ client_params = self._get_client_params()
107
+ return AsyncAnthropic(**client_params)
108
+
109
+ @property
110
+ def name(self) -> str:
111
+ return str(self.model)
112
+
113
+ def _get_usage(self, response: Message) -> ChatInvokeUsage | None:
114
+ usage = ChatInvokeUsage(
115
+ prompt_tokens=response.usage.input_tokens
116
+ + (
117
+ response.usage.cache_read_input_tokens or 0
118
+ ), # Total tokens in Anthropic are a bit fucked, you have to add cached tokens to the prompt tokens
119
+ completion_tokens=response.usage.output_tokens,
120
+ total_tokens=response.usage.input_tokens + response.usage.output_tokens,
121
+ prompt_cached_tokens=response.usage.cache_read_input_tokens,
122
+ prompt_cache_creation_tokens=response.usage.cache_creation_input_tokens,
123
+ prompt_image_tokens=None,
124
+ )
125
+ return usage
126
+
127
+ @overload
128
+ async def ainvoke(self, messages: list[BaseMessage], output_format: None = None) -> ChatInvokeCompletion[str]: ...
129
+
130
+ @overload
131
+ async def ainvoke(self, messages: list[BaseMessage], output_format: type[T]) -> ChatInvokeCompletion[T]: ...
132
+
133
+ async def ainvoke(
134
+ self, messages: list[BaseMessage], output_format: type[T] | None = None
135
+ ) -> ChatInvokeCompletion[T] | ChatInvokeCompletion[str]:
136
+ anthropic_messages, system_prompt = AnthropicMessageSerializer.serialize_messages(messages)
137
+
138
+ try:
139
+ if output_format is None:
140
+ # Normal completion without structured output
141
+ response = await self.get_client().messages.create(
142
+ model=self.model,
143
+ messages=anthropic_messages,
144
+ system=system_prompt or omit,
145
+ **self._get_client_params_for_invoke(),
146
+ )
147
+
148
+ # Ensure we have a valid Message object before accessing attributes
149
+ if not isinstance(response, Message):
150
+ raise ModelProviderError(
151
+ message=f'Unexpected response type from Anthropic API: {type(response).__name__}. Response: {str(response)[:200]}',
152
+ status_code=502,
153
+ model=self.name,
154
+ )
155
+
156
+ usage = self._get_usage(response)
157
+
158
+ # Extract text from the first content block
159
+ first_content = response.content[0]
160
+ if isinstance(first_content, TextBlock):
161
+ response_text = first_content.text
162
+ else:
163
+ # If it's not a text block, convert to string
164
+ response_text = str(first_content)
165
+
166
+ return ChatInvokeCompletion(
167
+ completion=response_text,
168
+ usage=usage,
169
+ stop_reason=response.stop_reason,
170
+ )
171
+
172
+ else:
173
+ # Use tool calling for structured output
174
+ # Create a tool that represents the output format
175
+ tool_name = output_format.__name__
176
+ schema = SchemaOptimizer.create_optimized_json_schema(output_format)
177
+
178
+ # Remove title from schema if present (Anthropic doesn't like it in parameters)
179
+ if 'title' in schema:
180
+ del schema['title']
181
+
182
+ tool = ToolParam(
183
+ name=tool_name,
184
+ description=f'Extract information in the format of {tool_name}',
185
+ input_schema=schema,
186
+ cache_control=CacheControlEphemeralParam(type='ephemeral'),
187
+ )
188
+
189
+ # Force the model to use this tool
190
+ tool_choice = ToolChoiceToolParam(type='tool', name=tool_name)
191
+
192
+ response = await self.get_client().messages.create(
193
+ model=self.model,
194
+ messages=anthropic_messages,
195
+ tools=[tool],
196
+ system=system_prompt or omit,
197
+ tool_choice=tool_choice,
198
+ **self._get_client_params_for_invoke(),
199
+ )
200
+
201
+ # Ensure we have a valid Message object before accessing attributes
202
+ if not isinstance(response, Message):
203
+ raise ModelProviderError(
204
+ message=f'Unexpected response type from Anthropic API: {type(response).__name__}. Response: {str(response)[:200]}',
205
+ status_code=502,
206
+ model=self.name,
207
+ )
208
+
209
+ usage = self._get_usage(response)
210
+
211
+ # Extract the tool use block
212
+ for content_block in response.content:
213
+ if hasattr(content_block, 'type') and content_block.type == 'tool_use':
214
+ # Parse the tool input as the structured output
215
+ try:
216
+ return ChatInvokeCompletion(
217
+ completion=output_format.model_validate(content_block.input),
218
+ usage=usage,
219
+ stop_reason=response.stop_reason,
220
+ )
221
+ except Exception as e:
222
+ # If validation fails, try to parse it as JSON first
223
+ if isinstance(content_block.input, str):
224
+ data = json.loads(content_block.input)
225
+ return ChatInvokeCompletion(
226
+ completion=output_format.model_validate(data),
227
+ usage=usage,
228
+ stop_reason=response.stop_reason,
229
+ )
230
+ raise e
231
+
232
+ # If no tool use block found, raise an error
233
+ raise ValueError('Expected tool use in response but none found')
234
+
235
+ except APIConnectionError as e:
236
+ raise ModelProviderError(message=e.message, model=self.name) from e
237
+ except RateLimitError as e:
238
+ raise ModelRateLimitError(message=e.message, model=self.name) from e
239
+ except APIStatusError as e:
240
+ raise ModelProviderError(message=e.message, status_code=e.status_code, model=self.name) from e
241
+ except Exception as e:
242
+ raise ModelProviderError(message=str(e), model=self.name) from e