camel-ai 0.2.73a1__py3-none-any.whl → 0.2.73a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +26 -1
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +101 -1101
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1177 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +46 -2
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +417 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +740 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +447 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +1994 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +227 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1002 -0
- camel/toolkits/slack_toolkit.py +38 -48
- {camel_ai-0.2.73a1.dist-info → camel_ai-0.2.73a2.dist-info}/METADATA +1 -1
- {camel_ai-0.2.73a1.dist-info → camel_ai-0.2.73a2.dist-info}/RECORD +19 -9
- {camel_ai-0.2.73a1.dist-info → camel_ai-0.2.73a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a1.dist-info → camel_ai-0.2.73a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,69 +11,26 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
-
# =========
|
|
15
14
|
|
|
16
|
-
import
|
|
17
|
-
from typing import Any, Callable, ClassVar, Dict, List, Optional, cast
|
|
15
|
+
from typing import Any, List, Literal, Optional
|
|
18
16
|
|
|
19
|
-
from camel.logger import get_logger
|
|
20
|
-
from camel.messages import BaseMessage
|
|
21
17
|
from camel.models import BaseModelBackend
|
|
22
|
-
from camel.toolkits.base import BaseToolkit
|
|
23
|
-
from camel.toolkits.function_tool import FunctionTool
|
|
24
|
-
from camel.utils.commons import dependencies_required
|
|
18
|
+
from camel.toolkits.base import BaseToolkit
|
|
25
19
|
|
|
26
|
-
from .config_loader import ConfigLoader
|
|
27
|
-
from .ws_wrapper import WebSocketBrowserWrapper
|
|
28
20
|
|
|
29
|
-
|
|
21
|
+
class HybridBrowserToolkit(BaseToolkit):
|
|
22
|
+
r"""A hybrid browser toolkit that can switch between TypeScript and Python
|
|
23
|
+
implementations.
|
|
30
24
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
automation with visual, screenshot-based capabilities.
|
|
35
|
-
|
|
36
|
-
This toolkit now uses TypeScript implementation with Playwright's
|
|
37
|
-
_snapshotForAI functionality for enhanced AI integration.
|
|
25
|
+
This wrapper allows users to choose between:
|
|
26
|
+
- 'typescript': WebSocket-based implementation using TypeScript/Node.js
|
|
27
|
+
- 'python': Pure Python implementation using Playwright directly
|
|
38
28
|
"""
|
|
39
29
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
"browser_open",
|
|
43
|
-
"browser_close",
|
|
44
|
-
"browser_visit_page",
|
|
45
|
-
"browser_back",
|
|
46
|
-
"browser_forward",
|
|
47
|
-
"browser_click",
|
|
48
|
-
"browser_type",
|
|
49
|
-
"browser_switch_tab",
|
|
50
|
-
]
|
|
51
|
-
|
|
52
|
-
# All available tools
|
|
53
|
-
ALL_TOOLS: ClassVar[List[str]] = [
|
|
54
|
-
"browser_open",
|
|
55
|
-
"browser_close",
|
|
56
|
-
"browser_visit_page",
|
|
57
|
-
"browser_back",
|
|
58
|
-
"browser_forward",
|
|
59
|
-
"browser_get_page_snapshot",
|
|
60
|
-
"browser_get_som_screenshot",
|
|
61
|
-
"browser_get_page_links",
|
|
62
|
-
"browser_click",
|
|
63
|
-
"browser_type",
|
|
64
|
-
"browser_select",
|
|
65
|
-
"browser_scroll",
|
|
66
|
-
"browser_enter",
|
|
67
|
-
"browser_wait_user",
|
|
68
|
-
"browser_solve_task",
|
|
69
|
-
"browser_switch_tab",
|
|
70
|
-
"browser_close_tab",
|
|
71
|
-
"browser_get_tab_info",
|
|
72
|
-
]
|
|
73
|
-
|
|
74
|
-
def __init__(
|
|
75
|
-
self,
|
|
30
|
+
def __new__(
|
|
31
|
+
cls,
|
|
76
32
|
*,
|
|
33
|
+
mode: Literal["typescript", "python"] = "typescript",
|
|
77
34
|
headless: bool = True,
|
|
78
35
|
user_data_dir: Optional[str] = None,
|
|
79
36
|
stealth: bool = False,
|
|
@@ -93,1085 +50,128 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
93
50
|
viewport_limit: bool = False,
|
|
94
51
|
connect_over_cdp: bool = False,
|
|
95
52
|
cdp_url: Optional[str] = None,
|
|
96
|
-
|
|
97
|
-
|
|
53
|
+
**kwargs: Any,
|
|
54
|
+
) -> Any:
|
|
55
|
+
r"""Create a HybridBrowserToolkit instance with the specified mode.
|
|
98
56
|
|
|
99
57
|
Args:
|
|
58
|
+
mode (Literal["typescript", "python"]): Implementation mode.
|
|
59
|
+
- 'typescript': Uses WebSocket-based TypeScript implementation
|
|
60
|
+
- 'python': Uses pure Python Playwright implementation
|
|
61
|
+
Defaults to "typescript".
|
|
100
62
|
headless (bool): Whether to run browser in headless mode.
|
|
101
|
-
|
|
63
|
+
Defaults to True.
|
|
102
64
|
user_data_dir (Optional[str]): Directory for user data
|
|
103
|
-
|
|
65
|
+
persistence. Defaults to None.
|
|
104
66
|
stealth (bool): Whether to enable stealth mode. Defaults to
|
|
105
|
-
|
|
67
|
+
False.
|
|
106
68
|
web_agent_model (Optional[BaseModelBackend]): Model for web
|
|
107
|
-
|
|
69
|
+
agent operations. Defaults to None.
|
|
108
70
|
cache_dir (str): Directory for caching. Defaults to "tmp/".
|
|
109
71
|
enabled_tools (Optional[List[str]]): List of enabled tools.
|
|
110
|
-
|
|
72
|
+
Defaults to None.
|
|
111
73
|
browser_log_to_file (bool): Whether to log browser actions to
|
|
112
|
-
|
|
74
|
+
file. Defaults to False.
|
|
113
75
|
session_id (Optional[str]): Session identifier. Defaults to None.
|
|
114
76
|
default_start_url (str): Default URL to start with. Defaults
|
|
115
|
-
|
|
77
|
+
to "https://google.com/".
|
|
116
78
|
default_timeout (Optional[int]): Default timeout in
|
|
117
|
-
|
|
79
|
+
milliseconds. Defaults to None.
|
|
118
80
|
short_timeout (Optional[int]): Short timeout in milliseconds.
|
|
119
|
-
|
|
81
|
+
Defaults to None.
|
|
120
82
|
navigation_timeout (Optional[int]): Navigation timeout in
|
|
121
|
-
|
|
83
|
+
milliseconds. Defaults to None.
|
|
122
84
|
network_idle_timeout (Optional[int]): Network idle timeout in
|
|
123
|
-
|
|
85
|
+
milliseconds. Defaults to None.
|
|
124
86
|
screenshot_timeout (Optional[int]): Screenshot timeout in
|
|
125
|
-
|
|
87
|
+
milliseconds. Defaults to None.
|
|
126
88
|
page_stability_timeout (Optional[int]): Page stability timeout
|
|
127
|
-
|
|
89
|
+
in milliseconds. Defaults to None.
|
|
128
90
|
dom_content_loaded_timeout (Optional[int]): DOM content loaded
|
|
129
|
-
|
|
91
|
+
timeout in milliseconds. Defaults to None.
|
|
130
92
|
viewport_limit (bool): Whether to filter page snapshot
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
bounds will be included in snapshots.
|
|
134
|
-
When False (default), all elements on the page are
|
|
135
|
-
included. Defaults to False.
|
|
93
|
+
elements to only those visible in the current viewport.
|
|
94
|
+
Defaults to False.
|
|
136
95
|
connect_over_cdp (bool): Whether to connect to an existing
|
|
137
|
-
|
|
96
|
+
browser via Chrome DevTools Protocol. Defaults to False.
|
|
97
|
+
(Only supported in TypeScript mode)
|
|
138
98
|
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
RegisteredAgentToolkit.__init__(self)
|
|
144
|
-
|
|
145
|
-
# Initialize configuration loader
|
|
146
|
-
self.config_loader = ConfigLoader.from_kwargs(
|
|
147
|
-
headless=headless,
|
|
148
|
-
user_data_dir=user_data_dir,
|
|
149
|
-
stealth=stealth,
|
|
150
|
-
default_start_url=default_start_url,
|
|
151
|
-
default_timeout=default_timeout,
|
|
152
|
-
short_timeout=short_timeout,
|
|
153
|
-
navigation_timeout=navigation_timeout,
|
|
154
|
-
network_idle_timeout=network_idle_timeout,
|
|
155
|
-
screenshot_timeout=screenshot_timeout,
|
|
156
|
-
page_stability_timeout=page_stability_timeout,
|
|
157
|
-
dom_content_loaded_timeout=dom_content_loaded_timeout,
|
|
158
|
-
viewport_limit=viewport_limit,
|
|
159
|
-
cache_dir=cache_dir,
|
|
160
|
-
browser_log_to_file=browser_log_to_file,
|
|
161
|
-
session_id=session_id,
|
|
162
|
-
enabled_tools=enabled_tools,
|
|
163
|
-
connect_over_cdp=connect_over_cdp,
|
|
164
|
-
cdp_url=cdp_url,
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
# Legacy attribute access for backward compatibility
|
|
168
|
-
browser_config = self.config_loader.get_browser_config()
|
|
169
|
-
toolkit_config = self.config_loader.get_toolkit_config()
|
|
170
|
-
|
|
171
|
-
self._headless = browser_config.headless
|
|
172
|
-
self._user_data_dir = browser_config.user_data_dir
|
|
173
|
-
self._stealth = browser_config.stealth
|
|
174
|
-
self._web_agent_model = web_agent_model
|
|
175
|
-
self._cache_dir = toolkit_config.cache_dir
|
|
176
|
-
self._browser_log_to_file = toolkit_config.browser_log_to_file
|
|
177
|
-
self._default_start_url = browser_config.default_start_url
|
|
178
|
-
self._session_id = toolkit_config.session_id or "default"
|
|
179
|
-
self._viewport_limit = browser_config.viewport_limit
|
|
180
|
-
|
|
181
|
-
# Store timeout configuration for backward compatibility
|
|
182
|
-
self._default_timeout = browser_config.default_timeout
|
|
183
|
-
self._short_timeout = browser_config.short_timeout
|
|
184
|
-
self._navigation_timeout = browser_config.navigation_timeout
|
|
185
|
-
self._network_idle_timeout = browser_config.network_idle_timeout
|
|
186
|
-
self._screenshot_timeout = browser_config.screenshot_timeout
|
|
187
|
-
self._page_stability_timeout = browser_config.page_stability_timeout
|
|
188
|
-
self._dom_content_loaded_timeout = (
|
|
189
|
-
browser_config.dom_content_loaded_timeout
|
|
190
|
-
)
|
|
191
|
-
|
|
192
|
-
# Configure enabled tools
|
|
193
|
-
if enabled_tools is None:
|
|
194
|
-
self.enabled_tools = self.DEFAULT_TOOLS.copy()
|
|
195
|
-
else:
|
|
196
|
-
# Validate enabled tools
|
|
197
|
-
invalid_tools = [
|
|
198
|
-
tool for tool in enabled_tools if tool not in self.ALL_TOOLS
|
|
199
|
-
]
|
|
200
|
-
if invalid_tools:
|
|
201
|
-
raise ValueError(
|
|
202
|
-
f"Invalid tools specified: {invalid_tools}. "
|
|
203
|
-
f"Available tools: {self.ALL_TOOLS}"
|
|
204
|
-
)
|
|
205
|
-
self.enabled_tools = enabled_tools.copy()
|
|
206
|
-
|
|
207
|
-
logger.info(f"Enabled tools: {self.enabled_tools}")
|
|
208
|
-
|
|
209
|
-
# Initialize WebSocket wrapper
|
|
210
|
-
self._ws_wrapper: Optional[WebSocketBrowserWrapper] = None
|
|
211
|
-
self._ws_config = self.config_loader.to_ws_config()
|
|
212
|
-
|
|
213
|
-
async def _ensure_ws_wrapper(self):
|
|
214
|
-
"""Ensure WebSocket wrapper is initialized."""
|
|
215
|
-
if self._ws_wrapper is None:
|
|
216
|
-
self._ws_wrapper = WebSocketBrowserWrapper(self._ws_config)
|
|
217
|
-
await self._ws_wrapper.start()
|
|
218
|
-
|
|
219
|
-
async def _get_ws_wrapper(self) -> WebSocketBrowserWrapper:
|
|
220
|
-
"""Get the WebSocket wrapper, initializing if needed."""
|
|
221
|
-
await self._ensure_ws_wrapper()
|
|
222
|
-
if self._ws_wrapper is None:
|
|
223
|
-
raise RuntimeError("Failed to initialize WebSocket wrapper")
|
|
224
|
-
return self._ws_wrapper
|
|
225
|
-
|
|
226
|
-
def __del__(self):
|
|
227
|
-
r"""Cleanup browser resources on garbage collection."""
|
|
228
|
-
try:
|
|
229
|
-
import sys
|
|
230
|
-
|
|
231
|
-
if getattr(sys, "is_finalizing", lambda: False)():
|
|
232
|
-
return
|
|
233
|
-
|
|
234
|
-
import asyncio
|
|
235
|
-
|
|
236
|
-
try:
|
|
237
|
-
loop = asyncio.get_event_loop()
|
|
238
|
-
if not loop.is_closed() and not loop.is_running():
|
|
239
|
-
try:
|
|
240
|
-
loop.run_until_complete(
|
|
241
|
-
asyncio.wait_for(self.browser_close(), timeout=2.0)
|
|
242
|
-
)
|
|
243
|
-
except asyncio.TimeoutError:
|
|
244
|
-
pass
|
|
245
|
-
except (RuntimeError, ImportError):
|
|
246
|
-
pass
|
|
247
|
-
except Exception:
|
|
248
|
-
pass
|
|
249
|
-
|
|
250
|
-
@property
|
|
251
|
-
def web_agent_model(self) -> Optional[BaseModelBackend]:
|
|
252
|
-
"""Get the web agent model."""
|
|
253
|
-
return self._web_agent_model
|
|
254
|
-
|
|
255
|
-
@web_agent_model.setter
|
|
256
|
-
def web_agent_model(self, value: Optional[BaseModelBackend]) -> None:
|
|
257
|
-
"""Set the web agent model."""
|
|
258
|
-
self._web_agent_model = value
|
|
259
|
-
|
|
260
|
-
@property
|
|
261
|
-
def cache_dir(self) -> str:
|
|
262
|
-
"""Get the cache directory."""
|
|
263
|
-
return self._cache_dir
|
|
264
|
-
|
|
265
|
-
# Public API Methods
|
|
266
|
-
|
|
267
|
-
async def browser_open(self) -> Dict[str, Any]:
|
|
268
|
-
r"""Starts a new browser session. This must be the first browser
|
|
269
|
-
action.
|
|
270
|
-
|
|
271
|
-
This method initializes the browser and navigates to a default start
|
|
272
|
-
page. To visit a specific URL, use `visit_page` after this.
|
|
273
|
-
|
|
274
|
-
Returns:
|
|
275
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
276
|
-
- "result" (str): Confirmation of the action.
|
|
277
|
-
- "snapshot" (str): A textual snapshot of interactive
|
|
278
|
-
elements.
|
|
279
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
280
|
-
- "current_tab" (int): Index of the active tab.
|
|
281
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
282
|
-
"""
|
|
283
|
-
try:
|
|
284
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
285
|
-
result = await ws_wrapper.open_browser(self._default_start_url)
|
|
286
|
-
|
|
287
|
-
# Add tab information
|
|
288
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
289
|
-
result.update(
|
|
290
|
-
{
|
|
291
|
-
"tabs": tab_info,
|
|
292
|
-
"current_tab": next(
|
|
293
|
-
(
|
|
294
|
-
i
|
|
295
|
-
for i, tab in enumerate(tab_info)
|
|
296
|
-
if tab.get("is_current")
|
|
297
|
-
),
|
|
298
|
-
0,
|
|
299
|
-
),
|
|
300
|
-
"total_tabs": len(tab_info),
|
|
301
|
-
}
|
|
302
|
-
)
|
|
303
|
-
|
|
304
|
-
return result
|
|
305
|
-
except Exception as e:
|
|
306
|
-
logger.error(f"Failed to open browser: {e}")
|
|
307
|
-
return {
|
|
308
|
-
"result": f"Error opening browser: {e}",
|
|
309
|
-
"snapshot": "",
|
|
310
|
-
"tabs": [],
|
|
311
|
-
"current_tab": 0,
|
|
312
|
-
"total_tabs": 0,
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
async def browser_close(self) -> str:
|
|
316
|
-
r"""Closes the browser session, releasing all resources.
|
|
317
|
-
|
|
318
|
-
This should be called at the end of a task for cleanup.
|
|
99
|
+
connection. Required when connect_over_cdp is True.
|
|
100
|
+
Defaults to None. (Only supported in TypeScript mode)
|
|
101
|
+
**kwargs: Additional keyword arguments passed to the
|
|
102
|
+
implementation.
|
|
319
103
|
|
|
320
104
|
Returns:
|
|
321
|
-
|
|
105
|
+
HybridBrowserToolkit instance of the specified implementation.
|
|
322
106
|
"""
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
self._ws_wrapper = None
|
|
327
|
-
return "Browser session closed."
|
|
328
|
-
except Exception as e:
|
|
329
|
-
logger.error(f"Failed to close browser: {e}")
|
|
330
|
-
return f"Error closing browser: {e}"
|
|
331
|
-
|
|
332
|
-
async def browser_visit_page(self, url: str) -> Dict[str, Any]:
|
|
333
|
-
r"""Opens a URL in a new browser tab and switches to it.
|
|
334
|
-
|
|
335
|
-
Args:
|
|
336
|
-
url (str): The web address to load. This should be a valid and
|
|
337
|
-
existing URL.
|
|
338
|
-
|
|
339
|
-
Returns:
|
|
340
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
341
|
-
- "result" (str): Confirmation of the action.
|
|
342
|
-
- "snapshot" (str): A textual snapshot of the new page.
|
|
343
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
344
|
-
- "current_tab" (int): Index of the new active tab.
|
|
345
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
346
|
-
"""
|
|
347
|
-
try:
|
|
348
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
349
|
-
result = await ws_wrapper.visit_page(url)
|
|
350
|
-
|
|
351
|
-
# Add tab information
|
|
352
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
353
|
-
result.update(
|
|
354
|
-
{
|
|
355
|
-
"tabs": tab_info,
|
|
356
|
-
"current_tab": next(
|
|
357
|
-
(
|
|
358
|
-
i
|
|
359
|
-
for i, tab in enumerate(tab_info)
|
|
360
|
-
if tab.get("is_current")
|
|
361
|
-
),
|
|
362
|
-
0,
|
|
363
|
-
),
|
|
364
|
-
"total_tabs": len(tab_info),
|
|
365
|
-
}
|
|
107
|
+
if mode == "typescript":
|
|
108
|
+
from .hybrid_browser_toolkit_ts import (
|
|
109
|
+
HybridBrowserToolkit as TSToolkit,
|
|
366
110
|
)
|
|
367
111
|
|
|
368
|
-
return
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
390
|
-
- "current_tab" (int): Index of the active tab.
|
|
391
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
392
|
-
"""
|
|
393
|
-
try:
|
|
394
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
395
|
-
result = await ws_wrapper.back()
|
|
396
|
-
|
|
397
|
-
# Add tab information
|
|
398
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
399
|
-
result.update(
|
|
400
|
-
{
|
|
401
|
-
"tabs": tab_info,
|
|
402
|
-
"current_tab": next(
|
|
403
|
-
(
|
|
404
|
-
i
|
|
405
|
-
for i, tab in enumerate(tab_info)
|
|
406
|
-
if tab.get("is_current")
|
|
407
|
-
),
|
|
408
|
-
0,
|
|
409
|
-
),
|
|
410
|
-
"total_tabs": len(tab_info),
|
|
411
|
-
}
|
|
112
|
+
return TSToolkit(
|
|
113
|
+
headless=headless,
|
|
114
|
+
user_data_dir=user_data_dir,
|
|
115
|
+
stealth=stealth,
|
|
116
|
+
web_agent_model=web_agent_model,
|
|
117
|
+
cache_dir=cache_dir,
|
|
118
|
+
enabled_tools=enabled_tools,
|
|
119
|
+
browser_log_to_file=browser_log_to_file,
|
|
120
|
+
session_id=session_id,
|
|
121
|
+
default_start_url=default_start_url,
|
|
122
|
+
default_timeout=default_timeout,
|
|
123
|
+
short_timeout=short_timeout,
|
|
124
|
+
navigation_timeout=navigation_timeout,
|
|
125
|
+
network_idle_timeout=network_idle_timeout,
|
|
126
|
+
screenshot_timeout=screenshot_timeout,
|
|
127
|
+
page_stability_timeout=page_stability_timeout,
|
|
128
|
+
dom_content_loaded_timeout=dom_content_loaded_timeout,
|
|
129
|
+
viewport_limit=viewport_limit,
|
|
130
|
+
connect_over_cdp=connect_over_cdp,
|
|
131
|
+
cdp_url=cdp_url,
|
|
132
|
+
**kwargs,
|
|
412
133
|
)
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
logger.error(f"Failed to navigate back: {e}")
|
|
417
|
-
return {
|
|
418
|
-
"result": f"Error navigating back: {e}",
|
|
419
|
-
"snapshot": "",
|
|
420
|
-
"tabs": [],
|
|
421
|
-
"current_tab": 0,
|
|
422
|
-
"total_tabs": 0,
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
async def browser_forward(self) -> Dict[str, Any]:
|
|
426
|
-
r"""Goes forward to the next page in the browser history.
|
|
427
|
-
|
|
428
|
-
This action simulates using the browser's "forward" button in the
|
|
429
|
-
currently active tab.
|
|
430
|
-
|
|
431
|
-
Returns:
|
|
432
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
433
|
-
- "result" (str): Confirmation of the action.
|
|
434
|
-
- "snapshot" (str): A textual snapshot of the next page.
|
|
435
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
436
|
-
- "current_tab" (int): Index of the active tab.
|
|
437
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
438
|
-
"""
|
|
439
|
-
try:
|
|
440
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
441
|
-
result = await ws_wrapper.forward()
|
|
442
|
-
|
|
443
|
-
# Add tab information
|
|
444
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
445
|
-
result.update(
|
|
446
|
-
{
|
|
447
|
-
"tabs": tab_info,
|
|
448
|
-
"current_tab": next(
|
|
449
|
-
(
|
|
450
|
-
i
|
|
451
|
-
for i, tab in enumerate(tab_info)
|
|
452
|
-
if tab.get("is_current")
|
|
453
|
-
),
|
|
454
|
-
0,
|
|
455
|
-
),
|
|
456
|
-
"total_tabs": len(tab_info),
|
|
457
|
-
}
|
|
134
|
+
elif mode == "python":
|
|
135
|
+
from ..hybrid_browser_toolkit_py import (
|
|
136
|
+
HybridBrowserToolkit as PyToolkit,
|
|
458
137
|
)
|
|
459
138
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
"result": f"Error navigating forward: {e}",
|
|
465
|
-
"snapshot": "",
|
|
466
|
-
"tabs": [],
|
|
467
|
-
"current_tab": 0,
|
|
468
|
-
"total_tabs": 0,
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
async def browser_get_page_snapshot(self) -> str:
|
|
472
|
-
r"""Gets a textual snapshot of the page's interactive elements.
|
|
473
|
-
|
|
474
|
-
The snapshot lists elements like buttons, links, and inputs,
|
|
475
|
-
each with
|
|
476
|
-
a unique `ref` ID. This ID is used by other tools (e.g., `click`,
|
|
477
|
-
`type`) to interact with a specific element. This tool provides no
|
|
478
|
-
visual information.
|
|
479
|
-
|
|
480
|
-
If viewport_limit is enabled, only elements within the current
|
|
481
|
-
viewport
|
|
482
|
-
will be included in the snapshot.
|
|
483
|
-
|
|
484
|
-
Returns:
|
|
485
|
-
str: A formatted string representing the interactive elements and
|
|
486
|
-
their `ref` IDs. For example:
|
|
487
|
-
'- link "Sign In" [ref=1]'
|
|
488
|
-
'- textbox "Username" [ref=2]'
|
|
489
|
-
"""
|
|
490
|
-
try:
|
|
491
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
492
|
-
return await ws_wrapper.get_page_snapshot(self._viewport_limit)
|
|
493
|
-
except Exception as e:
|
|
494
|
-
logger.error(f"Failed to get page snapshot: {e}")
|
|
495
|
-
return f"Error capturing snapshot: {e}"
|
|
496
|
-
|
|
497
|
-
@dependencies_required('PIL')
|
|
498
|
-
async def browser_get_som_screenshot(
|
|
499
|
-
self,
|
|
500
|
-
read_image: bool = True,
|
|
501
|
-
instruction: Optional[str] = None,
|
|
502
|
-
) -> str:
|
|
503
|
-
r"""Captures a screenshot with interactive elements highlighted.
|
|
504
|
-
|
|
505
|
-
"SoM" stands for "Set of Marks". This tool takes a screenshot and
|
|
506
|
-
draws
|
|
507
|
-
boxes around clickable elements, overlaying a `ref` ID on each. Use
|
|
508
|
-
this for a visual understanding of the page, especially when the
|
|
509
|
-
textual snapshot is not enough.
|
|
510
|
-
|
|
511
|
-
Args:
|
|
512
|
-
read_image (bool, optional): If `True`, the agent will analyze
|
|
513
|
-
the screenshot. Requires agent to be registered.
|
|
514
|
-
(default: :obj:`True`)
|
|
515
|
-
instruction (Optional[str], optional): A specific question or
|
|
516
|
-
command for the agent regarding the screenshot, used only if
|
|
517
|
-
`read_image` is `True`. For example: "Find the login button."
|
|
518
|
-
|
|
519
|
-
Returns:
|
|
520
|
-
str: A confirmation message indicating the screenshot was
|
|
521
|
-
captured, the file path where it was saved, and optionally the
|
|
522
|
-
agent's analysis if `read_image` is `True`.
|
|
523
|
-
"""
|
|
524
|
-
import base64
|
|
525
|
-
import datetime
|
|
526
|
-
import os
|
|
527
|
-
import urllib.parse
|
|
528
|
-
|
|
529
|
-
from camel.utils import sanitize_filename
|
|
530
|
-
|
|
531
|
-
try:
|
|
532
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
533
|
-
result = await ws_wrapper.get_som_screenshot()
|
|
534
|
-
|
|
535
|
-
# Initialize result text
|
|
536
|
-
result_text = result.text
|
|
537
|
-
file_path = None
|
|
538
|
-
|
|
539
|
-
# Save screenshot to cache directory if images are available
|
|
540
|
-
if result.images:
|
|
541
|
-
# Ensure cache directory exists (use absolute path)
|
|
542
|
-
cache_dir = os.path.abspath(self._cache_dir)
|
|
543
|
-
os.makedirs(cache_dir, exist_ok=True)
|
|
544
|
-
|
|
545
|
-
# Get current page URL for filename
|
|
546
|
-
try:
|
|
547
|
-
# Try to get the current page URL from the wrapper
|
|
548
|
-
page_info = await ws_wrapper.get_tab_info()
|
|
549
|
-
current_tab = next(
|
|
550
|
-
(tab for tab in page_info if tab.get('is_current')),
|
|
551
|
-
None,
|
|
552
|
-
)
|
|
553
|
-
url = current_tab['url'] if current_tab else 'unknown'
|
|
554
|
-
except Exception:
|
|
555
|
-
url = 'unknown'
|
|
556
|
-
|
|
557
|
-
# Generate filename
|
|
558
|
-
parsed_url = urllib.parse.urlparse(url)
|
|
559
|
-
url_name = sanitize_filename(
|
|
560
|
-
str(parsed_url.path) or 'homepage', max_length=241
|
|
561
|
-
)
|
|
562
|
-
timestamp = datetime.datetime.now().strftime("%m%d%H%M%S")
|
|
563
|
-
file_path = os.path.join(
|
|
564
|
-
cache_dir, f"{url_name}_{timestamp}_som.png"
|
|
139
|
+
# Note: Python implementation doesn't support CDP connection
|
|
140
|
+
if connect_over_cdp:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
"CDP connection is only supported in TypeScript mode"
|
|
565
143
|
)
|
|
566
144
|
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
# Remove data URL prefix
|
|
571
|
-
base64_data = image_data.split(',', 1)[1]
|
|
572
|
-
|
|
573
|
-
# Decode and save
|
|
574
|
-
image_bytes = base64.b64decode(base64_data)
|
|
575
|
-
with open(file_path, 'wb') as f:
|
|
576
|
-
f.write(image_bytes)
|
|
577
|
-
|
|
578
|
-
logger.info(f"Screenshot saved to: {file_path}")
|
|
579
|
-
|
|
580
|
-
# Update result text to include file path
|
|
581
|
-
result_text += f" (saved to: {file_path})"
|
|
582
|
-
break
|
|
583
|
-
|
|
584
|
-
# Analyze image if requested and agent is registered
|
|
585
|
-
if read_image and file_path:
|
|
586
|
-
if self.agent is None:
|
|
587
|
-
logger.error(
|
|
588
|
-
"Cannot analyze screenshot: No agent registered. "
|
|
589
|
-
"Please pass this toolkit to ChatAgent via "
|
|
590
|
-
"toolkits_to_register_agent parameter."
|
|
591
|
-
)
|
|
592
|
-
result_text += (
|
|
593
|
-
" Error: No agent registered for image analysis. "
|
|
594
|
-
"Please pass this toolkit to ChatAgent via "
|
|
595
|
-
"toolkits_to_register_agent parameter."
|
|
596
|
-
)
|
|
597
|
-
else:
|
|
598
|
-
try:
|
|
599
|
-
# Load the image and create a message
|
|
600
|
-
from PIL import Image
|
|
601
|
-
|
|
602
|
-
img = Image.open(file_path)
|
|
603
|
-
inst = instruction if instruction is not None else ""
|
|
604
|
-
message = BaseMessage.make_user_message(
|
|
605
|
-
role_name="User",
|
|
606
|
-
content=inst,
|
|
607
|
-
image_list=[img],
|
|
608
|
-
)
|
|
609
|
-
|
|
610
|
-
# Get agent's analysis
|
|
611
|
-
response = await self.agent.astep(message)
|
|
612
|
-
agent_response = response.msgs[0].content
|
|
613
|
-
result_text += f". Agent analysis: {agent_response}"
|
|
614
|
-
except Exception as e:
|
|
615
|
-
logger.error(f"Error analyzing screenshot: {e}")
|
|
616
|
-
result_text += f". Error analyzing screenshot: {e}"
|
|
617
|
-
|
|
618
|
-
return result_text
|
|
619
|
-
except Exception as e:
|
|
620
|
-
logger.error(f"Failed to get screenshot: {e}")
|
|
621
|
-
return f"Error capturing screenshot: {e}"
|
|
622
|
-
|
|
623
|
-
async def browser_click(self, *, ref: str) -> Dict[str, Any]:
|
|
624
|
-
r"""Performs a click on an element on the page.
|
|
625
|
-
|
|
626
|
-
Args:
|
|
627
|
-
ref (str): The `ref` ID of the element to click. This ID is
|
|
628
|
-
obtained from a page snapshot (`get_page_snapshot` or
|
|
629
|
-
`get_som_screenshot`).
|
|
630
|
-
|
|
631
|
-
Returns:
|
|
632
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
633
|
-
- "result" (str): Confirmation of the action.
|
|
634
|
-
- "snapshot" (str): A textual snapshot of the page after the
|
|
635
|
-
click.
|
|
636
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
637
|
-
- "current_tab" (int): Index of the active tab.
|
|
638
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
639
|
-
"""
|
|
640
|
-
try:
|
|
641
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
642
|
-
result = await ws_wrapper.click(ref)
|
|
643
|
-
|
|
644
|
-
# Add tab information
|
|
645
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
646
|
-
result.update(
|
|
647
|
-
{
|
|
648
|
-
"tabs": tab_info,
|
|
649
|
-
"current_tab": next(
|
|
650
|
-
(
|
|
651
|
-
i
|
|
652
|
-
for i, tab in enumerate(tab_info)
|
|
653
|
-
if tab.get("is_current")
|
|
654
|
-
),
|
|
655
|
-
0,
|
|
656
|
-
),
|
|
657
|
-
"total_tabs": len(tab_info),
|
|
658
|
-
}
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
return result
|
|
662
|
-
except Exception as e:
|
|
663
|
-
logger.error(f"Failed to click element: {e}")
|
|
664
|
-
return {
|
|
665
|
-
"result": f"Error clicking element: {e}",
|
|
666
|
-
"snapshot": "",
|
|
667
|
-
"tabs": [],
|
|
668
|
-
"current_tab": 0,
|
|
669
|
-
"total_tabs": 0,
|
|
670
|
-
}
|
|
671
|
-
|
|
672
|
-
async def browser_type(self, *, ref: str, text: str) -> Dict[str, Any]:
|
|
673
|
-
r"""Types text into an input element on the page.
|
|
674
|
-
|
|
675
|
-
Args:
|
|
676
|
-
ref (str): The `ref` ID of the input element, from a snapshot.
|
|
677
|
-
text (str): The text to type into the element.
|
|
678
|
-
|
|
679
|
-
Returns:
|
|
680
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
681
|
-
- "result" (str): Confirmation of the action.
|
|
682
|
-
- "snapshot" (str): A textual snapshot of the page after
|
|
683
|
-
typing.
|
|
684
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
685
|
-
- "current_tab" (int): Index of the active tab.
|
|
686
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
687
|
-
"""
|
|
688
|
-
try:
|
|
689
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
690
|
-
result = await ws_wrapper.type(ref, text)
|
|
691
|
-
|
|
692
|
-
# Add tab information
|
|
693
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
694
|
-
result.update(
|
|
695
|
-
{
|
|
696
|
-
"tabs": tab_info,
|
|
697
|
-
"current_tab": next(
|
|
698
|
-
(
|
|
699
|
-
i
|
|
700
|
-
for i, tab in enumerate(tab_info)
|
|
701
|
-
if tab.get("is_current")
|
|
702
|
-
),
|
|
703
|
-
0,
|
|
704
|
-
),
|
|
705
|
-
"total_tabs": len(tab_info),
|
|
706
|
-
}
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
return result
|
|
710
|
-
except Exception as e:
|
|
711
|
-
logger.error(f"Failed to type text: {e}")
|
|
712
|
-
return {
|
|
713
|
-
"result": f"Error typing text: {e}",
|
|
714
|
-
"snapshot": "",
|
|
715
|
-
"tabs": [],
|
|
716
|
-
"current_tab": 0,
|
|
717
|
-
"total_tabs": 0,
|
|
718
|
-
}
|
|
719
|
-
|
|
720
|
-
async def browser_select(self, *, ref: str, value: str) -> Dict[str, Any]:
|
|
721
|
-
r"""Selects an option in a dropdown (`<select>`) element.
|
|
722
|
-
|
|
723
|
-
Args:
|
|
724
|
-
ref (str): The `ref` ID of the `<select>` element.
|
|
725
|
-
value (str): The `value` attribute of the `<option>` to select,
|
|
726
|
-
not its visible text.
|
|
727
|
-
|
|
728
|
-
Returns:
|
|
729
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
730
|
-
- "result" (str): Confirmation of the action.
|
|
731
|
-
- "snapshot" (str): A snapshot of the page after the
|
|
732
|
-
selection.
|
|
733
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
734
|
-
- "current_tab" (int): Index of the active tab.
|
|
735
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
736
|
-
"""
|
|
737
|
-
try:
|
|
738
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
739
|
-
result = await ws_wrapper.select(ref, value)
|
|
740
|
-
|
|
741
|
-
# Add tab information
|
|
742
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
743
|
-
result.update(
|
|
744
|
-
{
|
|
745
|
-
"tabs": tab_info,
|
|
746
|
-
"current_tab": next(
|
|
747
|
-
(
|
|
748
|
-
i
|
|
749
|
-
for i, tab in enumerate(tab_info)
|
|
750
|
-
if tab.get("is_current")
|
|
751
|
-
),
|
|
752
|
-
0,
|
|
753
|
-
),
|
|
754
|
-
"total_tabs": len(tab_info),
|
|
755
|
-
}
|
|
756
|
-
)
|
|
757
|
-
|
|
758
|
-
return result
|
|
759
|
-
except Exception as e:
|
|
760
|
-
logger.error(f"Failed to select option: {e}")
|
|
761
|
-
return {
|
|
762
|
-
"result": f"Error selecting option: {e}",
|
|
763
|
-
"snapshot": "",
|
|
764
|
-
"tabs": [],
|
|
765
|
-
"current_tab": 0,
|
|
766
|
-
"total_tabs": 0,
|
|
767
|
-
}
|
|
768
|
-
|
|
769
|
-
async def browser_scroll(
|
|
770
|
-
self, *, direction: str, amount: int = 500
|
|
771
|
-
) -> Dict[str, Any]:
|
|
772
|
-
r"""Scrolls the current page window.
|
|
773
|
-
|
|
774
|
-
Args:
|
|
775
|
-
direction (str): The direction to scroll: 'up' or 'down'.
|
|
776
|
-
amount (int): The number of pixels to scroll, default is 500.
|
|
777
|
-
|
|
778
|
-
Returns:
|
|
779
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
780
|
-
- "result" (str): Confirmation of the action.
|
|
781
|
-
- "snapshot" (str): A snapshot of the page after scrolling.
|
|
782
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
783
|
-
- "current_tab" (int): Index of the active tab.
|
|
784
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
785
|
-
"""
|
|
786
|
-
try:
|
|
787
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
788
|
-
result = await ws_wrapper.scroll(direction, amount)
|
|
789
|
-
|
|
790
|
-
# Add tab information
|
|
791
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
792
|
-
result.update(
|
|
793
|
-
{
|
|
794
|
-
"tabs": tab_info,
|
|
795
|
-
"current_tab": next(
|
|
796
|
-
(
|
|
797
|
-
i
|
|
798
|
-
for i, tab in enumerate(tab_info)
|
|
799
|
-
if tab.get("is_current")
|
|
800
|
-
),
|
|
801
|
-
0,
|
|
802
|
-
),
|
|
803
|
-
"total_tabs": len(tab_info),
|
|
804
|
-
}
|
|
805
|
-
)
|
|
806
|
-
|
|
807
|
-
return result
|
|
808
|
-
except Exception as e:
|
|
809
|
-
logger.error(f"Failed to scroll: {e}")
|
|
810
|
-
return {
|
|
811
|
-
"result": f"Error scrolling: {e}",
|
|
812
|
-
"snapshot": "",
|
|
813
|
-
"tabs": [],
|
|
814
|
-
"current_tab": 0,
|
|
815
|
-
"total_tabs": 0,
|
|
816
|
-
}
|
|
817
|
-
|
|
818
|
-
async def browser_enter(self) -> Dict[str, Any]:
|
|
819
|
-
r"""Simulates pressing the Enter key on the currently focused
|
|
820
|
-
element.
|
|
821
|
-
|
|
822
|
-
This is useful for submitting forms or search queries after using the
|
|
823
|
-
`type` tool.
|
|
824
|
-
|
|
825
|
-
Returns:
|
|
826
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
827
|
-
- "result" (str): Confirmation of the action.
|
|
828
|
-
- "snapshot" (str): A new page snapshot, as this action often
|
|
829
|
-
triggers navigation.
|
|
830
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
831
|
-
- "current_tab" (int): Index of the active tab.
|
|
832
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
833
|
-
"""
|
|
834
|
-
try:
|
|
835
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
836
|
-
result = await ws_wrapper.enter()
|
|
837
|
-
|
|
838
|
-
# Add tab information
|
|
839
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
840
|
-
result.update(
|
|
841
|
-
{
|
|
842
|
-
"tabs": tab_info,
|
|
843
|
-
"current_tab": next(
|
|
844
|
-
(
|
|
845
|
-
i
|
|
846
|
-
for i, tab in enumerate(tab_info)
|
|
847
|
-
if tab.get("is_current")
|
|
848
|
-
),
|
|
849
|
-
0,
|
|
850
|
-
),
|
|
851
|
-
"total_tabs": len(tab_info),
|
|
852
|
-
}
|
|
853
|
-
)
|
|
854
|
-
|
|
855
|
-
return result
|
|
856
|
-
except Exception as e:
|
|
857
|
-
logger.error(f"Failed to press enter: {e}")
|
|
858
|
-
return {
|
|
859
|
-
"result": f"Error pressing enter: {e}",
|
|
860
|
-
"snapshot": "",
|
|
861
|
-
"tabs": [],
|
|
862
|
-
"current_tab": 0,
|
|
863
|
-
"total_tabs": 0,
|
|
864
|
-
}
|
|
865
|
-
|
|
866
|
-
async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
867
|
-
r"""Switches to a different browser tab using its ID.
|
|
868
|
-
|
|
869
|
-
After switching, all actions will apply to the new tab. Use
|
|
870
|
-
`get_tab_info` to find the ID of the tab you want to switch to.
|
|
871
|
-
|
|
872
|
-
Args:
|
|
873
|
-
tab_id (str): The ID of the tab to activate.
|
|
874
|
-
|
|
875
|
-
Returns:
|
|
876
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
877
|
-
- "result" (str): Confirmation of the action.
|
|
878
|
-
- "snapshot" (str): A snapshot of the newly active tab.
|
|
879
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
880
|
-
- "current_tab" (int): Index of the new active tab.
|
|
881
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
882
|
-
"""
|
|
883
|
-
try:
|
|
884
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
885
|
-
result = await ws_wrapper.switch_tab(tab_id)
|
|
886
|
-
|
|
887
|
-
# Add tab information
|
|
888
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
889
|
-
result.update(
|
|
890
|
-
{
|
|
891
|
-
"tabs": tab_info,
|
|
892
|
-
"current_tab": next(
|
|
893
|
-
(
|
|
894
|
-
i
|
|
895
|
-
for i, tab in enumerate(tab_info)
|
|
896
|
-
if tab.get("is_current")
|
|
897
|
-
),
|
|
898
|
-
0,
|
|
899
|
-
),
|
|
900
|
-
"total_tabs": len(tab_info),
|
|
901
|
-
}
|
|
902
|
-
)
|
|
903
|
-
|
|
904
|
-
return result
|
|
905
|
-
except Exception as e:
|
|
906
|
-
logger.error(f"Failed to switch tab: {e}")
|
|
907
|
-
return {
|
|
908
|
-
"result": f"Error switching tab: {e}",
|
|
909
|
-
"snapshot": "",
|
|
910
|
-
"tabs": [],
|
|
911
|
-
"current_tab": 0,
|
|
912
|
-
"total_tabs": 0,
|
|
913
|
-
}
|
|
914
|
-
|
|
915
|
-
async def browser_close_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
916
|
-
r"""Closes a browser tab using its ID.
|
|
917
|
-
|
|
918
|
-
Use `get_tab_info` to find the ID of the tab to close. After
|
|
919
|
-
closing, the browser will switch to another tab if available.
|
|
920
|
-
|
|
921
|
-
Args:
|
|
922
|
-
tab_id (str): The ID of the tab to close.
|
|
923
|
-
|
|
924
|
-
Returns:
|
|
925
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
926
|
-
- "result" (str): Confirmation of the action.
|
|
927
|
-
- "snapshot" (str): A snapshot of the active tab after
|
|
928
|
-
closure.
|
|
929
|
-
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
930
|
-
- "current_tab" (int): Index of the new active tab.
|
|
931
|
-
- "total_tabs" (int): Total number of remaining tabs.
|
|
932
|
-
"""
|
|
933
|
-
try:
|
|
934
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
935
|
-
result = await ws_wrapper.close_tab(tab_id)
|
|
936
|
-
|
|
937
|
-
# Add tab information
|
|
938
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
939
|
-
result.update(
|
|
940
|
-
{
|
|
941
|
-
"tabs": tab_info,
|
|
942
|
-
"current_tab": next(
|
|
943
|
-
(
|
|
944
|
-
i
|
|
945
|
-
for i, tab in enumerate(tab_info)
|
|
946
|
-
if tab.get("is_current")
|
|
947
|
-
),
|
|
948
|
-
0,
|
|
949
|
-
),
|
|
950
|
-
"total_tabs": len(tab_info),
|
|
951
|
-
}
|
|
952
|
-
)
|
|
953
|
-
|
|
954
|
-
return result
|
|
955
|
-
except Exception as e:
|
|
956
|
-
logger.error(f"Failed to close tab: {e}")
|
|
957
|
-
return {
|
|
958
|
-
"result": f"Error closing tab: {e}",
|
|
959
|
-
"snapshot": "",
|
|
960
|
-
"tabs": [],
|
|
961
|
-
"current_tab": 0,
|
|
962
|
-
"total_tabs": 0,
|
|
963
|
-
}
|
|
145
|
+
# Note: Python implementation doesn't support viewport_limit
|
|
146
|
+
if viewport_limit:
|
|
147
|
+
import warnings
|
|
964
148
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
tab is currently active. Use this to manage multiple tabs.
|
|
970
|
-
|
|
971
|
-
Returns:
|
|
972
|
-
Dict[str, Any]: A dictionary with tab information:
|
|
973
|
-
- "tabs" (List[Dict]): A list of open tabs, each with:
|
|
974
|
-
- "index" (int): The tab's zero-based index.
|
|
975
|
-
- "title" (str): The page title.
|
|
976
|
-
- "url" (str): The current URL.
|
|
977
|
-
- "is_current" (bool): True if the tab is active.
|
|
978
|
-
- "current_tab" (int): Index of the active tab.
|
|
979
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
980
|
-
"""
|
|
981
|
-
try:
|
|
982
|
-
ws_wrapper = await self._get_ws_wrapper()
|
|
983
|
-
tab_info = await ws_wrapper.get_tab_info()
|
|
984
|
-
|
|
985
|
-
return {
|
|
986
|
-
"tabs": tab_info,
|
|
987
|
-
"current_tab": next(
|
|
988
|
-
(
|
|
989
|
-
i
|
|
990
|
-
for i, tab in enumerate(tab_info)
|
|
991
|
-
if tab.get("is_current")
|
|
992
|
-
),
|
|
993
|
-
0,
|
|
994
|
-
),
|
|
995
|
-
"total_tabs": len(tab_info),
|
|
996
|
-
}
|
|
997
|
-
except Exception as e:
|
|
998
|
-
logger.error(f"Failed to get tab info: {e}")
|
|
999
|
-
return {
|
|
1000
|
-
"tabs": [],
|
|
1001
|
-
"current_tab": 0,
|
|
1002
|
-
"total_tabs": 0,
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
# Additional methods for backward compatibility
|
|
1006
|
-
async def browser_wait_user(
|
|
1007
|
-
self, timeout_sec: Optional[float] = None
|
|
1008
|
-
) -> Dict[str, Any]:
|
|
1009
|
-
r"""Pauses execution and waits for human input from the console.
|
|
1010
|
-
|
|
1011
|
-
Use this for tasks requiring manual steps, like solving a CAPTCHA.
|
|
1012
|
-
The
|
|
1013
|
-
agent will resume after the user presses Enter in the console.
|
|
1014
|
-
|
|
1015
|
-
Args:
|
|
1016
|
-
timeout_sec (Optional[float]): Max time to wait in seconds. If
|
|
1017
|
-
`None`, it will wait indefinitely.
|
|
1018
|
-
|
|
1019
|
-
Returns:
|
|
1020
|
-
Dict[str, Any]: A dictionary with the result of the action:
|
|
1021
|
-
- "result" (str): A message indicating how the wait ended.
|
|
1022
|
-
- "snapshot" (str): The page snapshot after the wait.
|
|
1023
|
-
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1024
|
-
- "current_tab" (int): Index of the active tab.
|
|
1025
|
-
- "total_tabs" (int): Total number of open tabs.
|
|
1026
|
-
"""
|
|
1027
|
-
import asyncio
|
|
1028
|
-
|
|
1029
|
-
prompt = (
|
|
1030
|
-
"🕑 Agent waiting for human input. "
|
|
1031
|
-
"Complete action in browser, then press Enter..."
|
|
1032
|
-
)
|
|
1033
|
-
logger.info(f"\n{prompt}\n")
|
|
1034
|
-
|
|
1035
|
-
async def _await_enter():
|
|
1036
|
-
try:
|
|
1037
|
-
await asyncio.to_thread(
|
|
1038
|
-
input, ">>> Press Enter to resume <<<\n"
|
|
149
|
+
warnings.warn(
|
|
150
|
+
"viewport_limit is not supported "
|
|
151
|
+
"in Python mode and will be ignored",
|
|
152
|
+
UserWarning,
|
|
1039
153
|
)
|
|
1040
|
-
except (asyncio.CancelledError, Exception):
|
|
1041
|
-
# Handle cancellation gracefully
|
|
1042
|
-
pass
|
|
1043
154
|
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
start_time = time.time()
|
|
1067
|
-
await _await_enter()
|
|
1068
|
-
wait_time = time.time() - start_time
|
|
1069
|
-
logger.info(f"User input received after {wait_time:.2f}s")
|
|
1070
|
-
result_msg = "User resumed."
|
|
1071
|
-
except asyncio.TimeoutError:
|
|
1072
|
-
wait_time = timeout_sec or 0.0
|
|
1073
|
-
logger.info(
|
|
1074
|
-
f"User input timeout reached after {wait_time}s, "
|
|
1075
|
-
f"auto-resuming"
|
|
155
|
+
return PyToolkit(
|
|
156
|
+
headless=headless,
|
|
157
|
+
user_data_dir=user_data_dir,
|
|
158
|
+
stealth=stealth,
|
|
159
|
+
web_agent_model=web_agent_model,
|
|
160
|
+
cache_dir=cache_dir,
|
|
161
|
+
enabled_tools=enabled_tools,
|
|
162
|
+
browser_log_to_file=browser_log_to_file,
|
|
163
|
+
session_id=session_id,
|
|
164
|
+
default_start_url=default_start_url,
|
|
165
|
+
default_timeout=default_timeout,
|
|
166
|
+
short_timeout=short_timeout,
|
|
167
|
+
navigation_timeout=navigation_timeout,
|
|
168
|
+
network_idle_timeout=network_idle_timeout,
|
|
169
|
+
screenshot_timeout=screenshot_timeout,
|
|
170
|
+
page_stability_timeout=page_stability_timeout,
|
|
171
|
+
dom_content_loaded_timeout=dom_content_loaded_timeout,
|
|
172
|
+
**kwargs,
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"Invalid mode: {mode}. Must be 'typescript' or 'python'."
|
|
1076
177
|
)
|
|
1077
|
-
result_msg = f"Timeout {timeout_sec}s reached, auto-resumed."
|
|
1078
|
-
|
|
1079
|
-
try:
|
|
1080
|
-
snapshot = await self.browser_get_page_snapshot()
|
|
1081
|
-
tab_info = await self.browser_get_tab_info()
|
|
1082
|
-
return {"result": result_msg, "snapshot": snapshot, **tab_info}
|
|
1083
|
-
except Exception as e:
|
|
1084
|
-
logger.warning(f"Failed to get snapshot after wait: {e}")
|
|
1085
|
-
return {
|
|
1086
|
-
"result": result_msg,
|
|
1087
|
-
"snapshot": "",
|
|
1088
|
-
"tabs": [],
|
|
1089
|
-
"current_tab": 0,
|
|
1090
|
-
"total_tabs": 0,
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
def clone_for_new_session(
|
|
1094
|
-
self, new_session_id: Optional[str] = None
|
|
1095
|
-
) -> "HybridBrowserToolkit":
|
|
1096
|
-
r"""Create a new instance of HybridBrowserToolkit with a unique
|
|
1097
|
-
session.
|
|
1098
|
-
|
|
1099
|
-
Args:
|
|
1100
|
-
new_session_id: Optional new session ID. If None, a UUID will be
|
|
1101
|
-
generated.
|
|
1102
|
-
|
|
1103
|
-
Returns:
|
|
1104
|
-
A new HybridBrowserToolkit instance with the same configuration
|
|
1105
|
-
but a different session.
|
|
1106
|
-
"""
|
|
1107
|
-
import uuid
|
|
1108
|
-
|
|
1109
|
-
if new_session_id is None:
|
|
1110
|
-
new_session_id = str(uuid.uuid4())[:8]
|
|
1111
|
-
|
|
1112
|
-
return HybridBrowserToolkit(
|
|
1113
|
-
headless=self._headless,
|
|
1114
|
-
user_data_dir=self._user_data_dir,
|
|
1115
|
-
stealth=self._stealth,
|
|
1116
|
-
web_agent_model=self._web_agent_model,
|
|
1117
|
-
cache_dir=f"{self._cache_dir.rstrip('/')}_clone_"
|
|
1118
|
-
f"{new_session_id}/",
|
|
1119
|
-
enabled_tools=self.enabled_tools.copy(),
|
|
1120
|
-
browser_log_to_file=self._browser_log_to_file,
|
|
1121
|
-
session_id=new_session_id,
|
|
1122
|
-
default_start_url=self._default_start_url,
|
|
1123
|
-
default_timeout=self._default_timeout,
|
|
1124
|
-
short_timeout=self._short_timeout,
|
|
1125
|
-
navigation_timeout=self._navigation_timeout,
|
|
1126
|
-
network_idle_timeout=self._network_idle_timeout,
|
|
1127
|
-
screenshot_timeout=self._screenshot_timeout,
|
|
1128
|
-
page_stability_timeout=self._page_stability_timeout,
|
|
1129
|
-
dom_content_loaded_timeout=self._dom_content_loaded_timeout,
|
|
1130
|
-
)
|
|
1131
|
-
|
|
1132
|
-
def get_tools(self) -> List[FunctionTool]:
|
|
1133
|
-
r"""Get available function tools based
|
|
1134
|
-
on enabled_tools configuration."""
|
|
1135
|
-
# Map tool names to their corresponding methods
|
|
1136
|
-
tool_map = {
|
|
1137
|
-
"browser_open": self.browser_open,
|
|
1138
|
-
"browser_close": self.browser_close,
|
|
1139
|
-
"browser_visit_page": self.browser_visit_page,
|
|
1140
|
-
"browser_back": self.browser_back,
|
|
1141
|
-
"browser_forward": self.browser_forward,
|
|
1142
|
-
"browser_get_page_snapshot": self.browser_get_page_snapshot,
|
|
1143
|
-
"browser_get_som_screenshot": self.browser_get_som_screenshot,
|
|
1144
|
-
"browser_click": self.browser_click,
|
|
1145
|
-
"browser_type": self.browser_type,
|
|
1146
|
-
"browser_select": self.browser_select,
|
|
1147
|
-
"browser_scroll": self.browser_scroll,
|
|
1148
|
-
"browser_enter": self.browser_enter,
|
|
1149
|
-
"browser_wait_user": self.browser_wait_user,
|
|
1150
|
-
"browser_switch_tab": self.browser_switch_tab,
|
|
1151
|
-
"browser_close_tab": self.browser_close_tab,
|
|
1152
|
-
"browser_get_tab_info": self.browser_get_tab_info,
|
|
1153
|
-
}
|
|
1154
|
-
|
|
1155
|
-
enabled_tools = []
|
|
1156
|
-
|
|
1157
|
-
for tool_name in self.enabled_tools:
|
|
1158
|
-
if (
|
|
1159
|
-
tool_name == "browser_solve_task"
|
|
1160
|
-
and self._web_agent_model is None
|
|
1161
|
-
):
|
|
1162
|
-
logger.warning(
|
|
1163
|
-
f"Tool '{tool_name}' is enabled but web_agent_model "
|
|
1164
|
-
f"is not provided. Skipping this tool."
|
|
1165
|
-
)
|
|
1166
|
-
continue
|
|
1167
|
-
|
|
1168
|
-
if tool_name in tool_map:
|
|
1169
|
-
tool = FunctionTool(
|
|
1170
|
-
cast(Callable[..., Any], tool_map[tool_name])
|
|
1171
|
-
)
|
|
1172
|
-
enabled_tools.append(tool)
|
|
1173
|
-
else:
|
|
1174
|
-
logger.warning(f"Unknown tool name: {tool_name}")
|
|
1175
|
-
|
|
1176
|
-
logger.info(f"Returning {len(enabled_tools)} enabled tools")
|
|
1177
|
-
return enabled_tools
|