camel-ai 0.2.73a12__py3-none-any.whl → 0.2.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/models/anthropic_model.py +5 -3
- camel/societies/workforce/prompts.py +3 -19
- camel/societies/workforce/workforce.py +13 -8
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/note_taking_toolkit.py +3 -4
- camel/toolkits/search_toolkit.py +192 -59
- camel/toolkits/terminal_toolkit.py +12 -2
- camel/types/enums.py +3 -0
- camel/utils/token_counting.py +13 -2
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/METADATA +3 -2
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/RECORD +28 -28
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a12.dist-info → camel_ai-0.2.74.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
from collections import deque
|
|
17
18
|
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Tuple
|
|
18
19
|
|
|
19
20
|
from camel.logger import get_logger
|
|
@@ -26,6 +27,7 @@ if TYPE_CHECKING:
|
|
|
26
27
|
from playwright.async_api import (
|
|
27
28
|
Browser,
|
|
28
29
|
BrowserContext,
|
|
30
|
+
ConsoleMessage,
|
|
29
31
|
Page,
|
|
30
32
|
Playwright,
|
|
31
33
|
)
|
|
@@ -188,7 +190,9 @@ class HybridBrowserSession:
|
|
|
188
190
|
|
|
189
191
|
# Dictionary-based tab management with monotonic IDs
|
|
190
192
|
self._pages: Dict[str, Page] = {} # tab_id -> Page object
|
|
193
|
+
self._console_logs: Dict[str, Any] = {} # tab_id -> page logs
|
|
191
194
|
self._current_tab_id: Optional[str] = None # Current active tab ID
|
|
195
|
+
self.log_limit: int = ConfigLoader.get_max_log_limit() or 1000
|
|
192
196
|
|
|
193
197
|
self.snapshot: Optional[PageSnapshot] = None
|
|
194
198
|
self.executor: Optional[ActionExecutor] = None
|
|
@@ -266,7 +270,7 @@ class HybridBrowserSession:
|
|
|
266
270
|
)
|
|
267
271
|
|
|
268
272
|
# Store in pages dictionary
|
|
269
|
-
self.
|
|
273
|
+
await self._register_new_page(tab_id, new_page)
|
|
270
274
|
|
|
271
275
|
# Navigate if URL provided
|
|
272
276
|
if url:
|
|
@@ -281,6 +285,32 @@ class HybridBrowserSession:
|
|
|
281
285
|
)
|
|
282
286
|
return tab_id
|
|
283
287
|
|
|
288
|
+
async def _register_new_page(self, tab_id: str, new_page: "Page") -> None:
|
|
289
|
+
r"""Register a page and add console event listerers.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
new_page (Page): The new page object to register.
|
|
293
|
+
"""
|
|
294
|
+
# Add new page
|
|
295
|
+
self._pages[tab_id] = new_page
|
|
296
|
+
# Create log for the page
|
|
297
|
+
self._console_logs[tab_id] = deque(maxlen=self.log_limit)
|
|
298
|
+
|
|
299
|
+
# Add event function
|
|
300
|
+
def handle_console_log(msg: ConsoleMessage):
|
|
301
|
+
logs = self._console_logs.get(tab_id)
|
|
302
|
+
if logs is not None:
|
|
303
|
+
logs.append({"type": msg.type, "text": msg.text})
|
|
304
|
+
|
|
305
|
+
# Add event listener for console logs
|
|
306
|
+
new_page.on(event="console", f=handle_console_log)
|
|
307
|
+
|
|
308
|
+
def handle_page_close(page: "Page"):
|
|
309
|
+
self._console_logs.pop(tab_id, None)
|
|
310
|
+
|
|
311
|
+
# Add event listener for cleanup
|
|
312
|
+
new_page.on(event="close", f=handle_page_close)
|
|
313
|
+
|
|
284
314
|
async def register_page(self, new_page: "Page") -> str:
|
|
285
315
|
r"""Register a page that was created externally (e.g., by a click).
|
|
286
316
|
|
|
@@ -297,7 +327,7 @@ class HybridBrowserSession:
|
|
|
297
327
|
|
|
298
328
|
# Create new ID for the page
|
|
299
329
|
tab_id = await TabIdGenerator.generate_tab_id()
|
|
300
|
-
self.
|
|
330
|
+
await self._register_new_page(tab_id, new_page)
|
|
301
331
|
|
|
302
332
|
logger.info(
|
|
303
333
|
f"Registered new tab {tab_id} (opened by user action). "
|
|
@@ -458,6 +488,7 @@ class HybridBrowserSession:
|
|
|
458
488
|
self._context = singleton_instance._context
|
|
459
489
|
self._page = singleton_instance._page
|
|
460
490
|
self._pages = singleton_instance._pages
|
|
491
|
+
self._console_logs = singleton_instance._console_logs
|
|
461
492
|
self._current_tab_id = singleton_instance._current_tab_id
|
|
462
493
|
self.snapshot = singleton_instance.snapshot
|
|
463
494
|
self.executor = singleton_instance.executor
|
|
@@ -502,16 +533,16 @@ class HybridBrowserSession:
|
|
|
502
533
|
self._page = pages[0]
|
|
503
534
|
# Create ID for initial page
|
|
504
535
|
initial_tab_id = await TabIdGenerator.generate_tab_id()
|
|
505
|
-
self.
|
|
536
|
+
await self._register_new_page(initial_tab_id, pages[0])
|
|
506
537
|
self._current_tab_id = initial_tab_id
|
|
507
538
|
# Handle additional pages if any
|
|
508
539
|
for page in pages[1:]:
|
|
509
540
|
tab_id = await TabIdGenerator.generate_tab_id()
|
|
510
|
-
self.
|
|
541
|
+
await self._register_new_page(tab_id, page)
|
|
511
542
|
else:
|
|
512
543
|
self._page = await context.new_page()
|
|
513
544
|
initial_tab_id = await TabIdGenerator.generate_tab_id()
|
|
514
|
-
self.
|
|
545
|
+
await self._register_new_page(initial_tab_id, self._page)
|
|
515
546
|
self._current_tab_id = initial_tab_id
|
|
516
547
|
else:
|
|
517
548
|
self._browser = await self._playwright.chromium.launch(
|
|
@@ -522,7 +553,7 @@ class HybridBrowserSession:
|
|
|
522
553
|
|
|
523
554
|
# Create ID for initial page
|
|
524
555
|
initial_tab_id = await TabIdGenerator.generate_tab_id()
|
|
525
|
-
self.
|
|
556
|
+
await self._register_new_page(initial_tab_id, self._page)
|
|
526
557
|
self._current_tab_id = initial_tab_id
|
|
527
558
|
|
|
528
559
|
# Apply stealth modifications if enabled
|
|
@@ -713,13 +744,19 @@ class HybridBrowserSession:
|
|
|
713
744
|
return f"Navigated to {url}"
|
|
714
745
|
|
|
715
746
|
async def get_snapshot(
|
|
716
|
-
self,
|
|
747
|
+
self,
|
|
748
|
+
*,
|
|
749
|
+
force_refresh: bool = False,
|
|
750
|
+
diff_only: bool = False,
|
|
751
|
+
viewport_limit: bool = False,
|
|
717
752
|
) -> str:
|
|
718
753
|
r"""Get snapshot for current tab."""
|
|
719
754
|
if not self.snapshot:
|
|
720
755
|
return "<empty>"
|
|
721
756
|
return await self.snapshot.capture(
|
|
722
|
-
force_refresh=force_refresh,
|
|
757
|
+
force_refresh=force_refresh,
|
|
758
|
+
diff_only=diff_only,
|
|
759
|
+
viewport_limit=viewport_limit,
|
|
723
760
|
)
|
|
724
761
|
|
|
725
762
|
async def exec_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -738,3 +775,13 @@ class HybridBrowserSession:
|
|
|
738
775
|
if self._page is None:
|
|
739
776
|
raise RuntimeError("No active page available")
|
|
740
777
|
return self._page
|
|
778
|
+
|
|
779
|
+
async def get_console_logs(self) -> Dict[str, Any]:
|
|
780
|
+
r"""Get current active logs."""
|
|
781
|
+
await self.ensure_browser()
|
|
782
|
+
if self._current_tab_id is None:
|
|
783
|
+
raise RuntimeError("No active tab available")
|
|
784
|
+
logs = self._console_logs.get(self._current_tab_id, None)
|
|
785
|
+
if logs is None:
|
|
786
|
+
raise RuntimeError("No active logs available for the page")
|
|
787
|
+
return logs
|
|
@@ -40,6 +40,9 @@ class BrowserConfig:
|
|
|
40
40
|
# Default action limits
|
|
41
41
|
DEFAULT_MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
|
|
42
42
|
|
|
43
|
+
# Default config limits
|
|
44
|
+
DEFAULT_MAX_LOG_LIMIT = 1000
|
|
45
|
+
|
|
43
46
|
@staticmethod
|
|
44
47
|
def get_timeout_config() -> Dict[str, int]:
|
|
45
48
|
r"""Get timeout configuration with environment variable support.
|
|
@@ -108,6 +111,22 @@ class BrowserConfig:
|
|
|
108
111
|
),
|
|
109
112
|
}
|
|
110
113
|
|
|
114
|
+
@staticmethod
|
|
115
|
+
def get_log_limits() -> Dict[str, int]:
|
|
116
|
+
r"""Get log limits configuration with environment variable support.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Dict[str, int]: Console Log limits configuration.
|
|
120
|
+
"""
|
|
121
|
+
return {
|
|
122
|
+
'max_log_limit': int(
|
|
123
|
+
os.getenv(
|
|
124
|
+
'HYBRID_BROWSER_MAX_LOG_LIMIT',
|
|
125
|
+
BrowserConfig.DEFAULT_MAX_LOG_LIMIT,
|
|
126
|
+
)
|
|
127
|
+
),
|
|
128
|
+
}
|
|
129
|
+
|
|
111
130
|
@staticmethod
|
|
112
131
|
def get_action_timeout(override: Optional[int] = None) -> int:
|
|
113
132
|
r"""Get action timeout with optional override.
|
|
@@ -178,6 +197,20 @@ class BrowserConfig:
|
|
|
178
197
|
return override
|
|
179
198
|
return BrowserConfig.get_action_limits()['max_scroll_amount']
|
|
180
199
|
|
|
200
|
+
@staticmethod
|
|
201
|
+
def get_max_log_limit(override: Optional[int] = None) -> int:
|
|
202
|
+
r"""Get maximum log limit with optional override.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
override: Optional log limit override value.
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
int: Maximum log limit.
|
|
209
|
+
"""
|
|
210
|
+
if override is not None:
|
|
211
|
+
return override
|
|
212
|
+
return BrowserConfig.get_log_limits()['max_log_limit']
|
|
213
|
+
|
|
181
214
|
@staticmethod
|
|
182
215
|
def get_screenshot_timeout(override: Optional[int] = None) -> int:
|
|
183
216
|
r"""Get screenshot timeout with optional override.
|
|
@@ -370,6 +403,11 @@ class ConfigLoader:
|
|
|
370
403
|
r"""Get maximum scroll amount with optional override."""
|
|
371
404
|
return BrowserConfig.get_max_scroll_amount(override)
|
|
372
405
|
|
|
406
|
+
@classmethod
|
|
407
|
+
def get_max_log_limit(cls, override: Optional[int] = None) -> int:
|
|
408
|
+
r"""Get maximum log limit with optional override."""
|
|
409
|
+
return BrowserConfig.get_max_log_limit(override)
|
|
410
|
+
|
|
373
411
|
@classmethod
|
|
374
412
|
def get_screenshot_timeout(cls, override: Optional[int] = None) -> int:
|
|
375
413
|
r"""Get screenshot timeout with optional override."""
|
|
@@ -432,6 +470,11 @@ def get_max_scroll_amount(override: Optional[int] = None) -> int:
|
|
|
432
470
|
return BrowserConfig.get_max_scroll_amount(override)
|
|
433
471
|
|
|
434
472
|
|
|
473
|
+
def get_max_log_limit(override: Optional[int] = None) -> int:
|
|
474
|
+
r"""Get maximum log limit with optional override."""
|
|
475
|
+
return BrowserConfig.get_max_log_limit(override)
|
|
476
|
+
|
|
477
|
+
|
|
435
478
|
def get_screenshot_timeout(override: Optional[int] = None) -> int:
|
|
436
479
|
r"""Get screenshot timeout with optional override."""
|
|
437
480
|
return BrowserConfig.get_screenshot_timeout(override)
|
|
@@ -73,11 +73,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
73
73
|
"browser_select",
|
|
74
74
|
"browser_scroll",
|
|
75
75
|
"browser_enter",
|
|
76
|
+
"browser_mouse_control",
|
|
77
|
+
"browser_mouse_drag",
|
|
78
|
+
"browser_press_key",
|
|
76
79
|
"browser_wait_user",
|
|
77
80
|
"browser_solve_task",
|
|
78
81
|
"browser_switch_tab",
|
|
79
82
|
"browser_close_tab",
|
|
80
83
|
"browser_get_tab_info",
|
|
84
|
+
"browser_console_view",
|
|
85
|
+
"browser_console_exec",
|
|
81
86
|
]
|
|
82
87
|
|
|
83
88
|
def __init__(
|
|
@@ -99,6 +104,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
99
104
|
screenshot_timeout: Optional[int] = None,
|
|
100
105
|
page_stability_timeout: Optional[int] = None,
|
|
101
106
|
dom_content_loaded_timeout: Optional[int] = None,
|
|
107
|
+
viewport_limit: bool = False,
|
|
102
108
|
) -> None:
|
|
103
109
|
r"""Initialize the HybridBrowserToolkit.
|
|
104
110
|
|
|
@@ -182,6 +188,10 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
182
188
|
HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to
|
|
183
189
|
5000ms.
|
|
184
190
|
Defaults to `None`.
|
|
191
|
+
viewport_limit (bool): When True, only return snapshot results
|
|
192
|
+
visible in the current viewport. When False, return all
|
|
193
|
+
elements on the page regardless of visibility.
|
|
194
|
+
Defaults to `False`.
|
|
185
195
|
"""
|
|
186
196
|
super().__init__()
|
|
187
197
|
RegisteredAgentToolkit.__init__(self)
|
|
@@ -193,6 +203,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
193
203
|
self._browser_log_to_file = browser_log_to_file
|
|
194
204
|
self._default_start_url = default_start_url
|
|
195
205
|
self._session_id = session_id or "default"
|
|
206
|
+
self._viewport_limit = viewport_limit
|
|
196
207
|
|
|
197
208
|
# Store timeout configuration
|
|
198
209
|
self._default_timeout = default_timeout
|
|
@@ -309,7 +320,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
309
320
|
# Try to close browser with a timeout to prevent hanging
|
|
310
321
|
try:
|
|
311
322
|
loop.run_until_complete(
|
|
312
|
-
asyncio.wait_for(self.
|
|
323
|
+
asyncio.wait_for(self.browser_close(), timeout=2.0)
|
|
313
324
|
)
|
|
314
325
|
except asyncio.TimeoutError:
|
|
315
326
|
pass # Skip cleanup if it takes too long
|
|
@@ -550,7 +561,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
550
561
|
)
|
|
551
562
|
|
|
552
563
|
async def _get_unified_analysis(
|
|
553
|
-
self, max_retries: int = 3
|
|
564
|
+
self, max_retries: int = 3, viewport_limit: Optional[bool] = None
|
|
554
565
|
) -> Dict[str, Any]:
|
|
555
566
|
r"""Get unified analysis data from the page with retry mechanism for
|
|
556
567
|
navigation issues."""
|
|
@@ -573,7 +584,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
573
584
|
# Don't fail if DOM wait times out
|
|
574
585
|
pass
|
|
575
586
|
|
|
576
|
-
|
|
587
|
+
# Use instance viewport_limit if parameter not provided
|
|
588
|
+
use_viewport_limit = (
|
|
589
|
+
viewport_limit
|
|
590
|
+
if viewport_limit is not None
|
|
591
|
+
else self._viewport_limit
|
|
592
|
+
)
|
|
593
|
+
result = await page.evaluate(
|
|
594
|
+
self._unified_script, use_viewport_limit
|
|
595
|
+
)
|
|
577
596
|
|
|
578
597
|
if not isinstance(result, dict):
|
|
579
598
|
logger.warning(f"Invalid result type: {type(result)}")
|
|
@@ -1703,6 +1722,149 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1703
1722
|
|
|
1704
1723
|
return result
|
|
1705
1724
|
|
|
1725
|
+
@action_logger
|
|
1726
|
+
async def browser_mouse_control(
|
|
1727
|
+
self, *, control: str, x: float, y: float
|
|
1728
|
+
) -> Dict[str, Any]:
|
|
1729
|
+
r"""Control the mouse to interact with browser with x, y coordinates
|
|
1730
|
+
|
|
1731
|
+
Args:
|
|
1732
|
+
control (str): The action to perform: 'click', 'right_click'
|
|
1733
|
+
or 'dblclick'.
|
|
1734
|
+
x (float): x-coordinate for the control action.
|
|
1735
|
+
y (float): y-coordinate for the control action.
|
|
1736
|
+
|
|
1737
|
+
Returns:
|
|
1738
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1739
|
+
- "result" (str): Confirmation of the action.
|
|
1740
|
+
- "snapshot" (str): A new page snapshot.
|
|
1741
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1742
|
+
- "current_tab" (int): Index of the active tab.
|
|
1743
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1744
|
+
"""
|
|
1745
|
+
if control not in ("click", "right_click", "dblclick"):
|
|
1746
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1747
|
+
return {
|
|
1748
|
+
"result": "Error: supported control actions are "
|
|
1749
|
+
"'click' or 'dblclick'",
|
|
1750
|
+
"snapshot": "",
|
|
1751
|
+
**tab_info,
|
|
1752
|
+
}
|
|
1753
|
+
|
|
1754
|
+
action = {"type": "mouse_control", "control": control, "x": x, "y": y}
|
|
1755
|
+
|
|
1756
|
+
result = await self._exec_with_snapshot(action)
|
|
1757
|
+
|
|
1758
|
+
# Add tab information to the result
|
|
1759
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1760
|
+
result.update(tab_info)
|
|
1761
|
+
|
|
1762
|
+
return result
|
|
1763
|
+
|
|
1764
|
+
@action_logger
|
|
1765
|
+
async def browser_mouse_drag(
|
|
1766
|
+
self, *, from_ref: str, to_ref: str
|
|
1767
|
+
) -> Dict[str, Any]:
|
|
1768
|
+
r"""Control the mouse to drag and drop in the browser using ref IDs.
|
|
1769
|
+
|
|
1770
|
+
Args:
|
|
1771
|
+
from_ref (str): The `ref` ID of the source element to drag from.
|
|
1772
|
+
to_ref (str): The `ref` ID of the target element to drag to.
|
|
1773
|
+
|
|
1774
|
+
Returns:
|
|
1775
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1776
|
+
- "result" (str): Confirmation of the action.
|
|
1777
|
+
- "snapshot" (str): A new page snapshot.
|
|
1778
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1779
|
+
- "current_tab" (int): Index of the active tab.
|
|
1780
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1781
|
+
"""
|
|
1782
|
+
# Validate refs
|
|
1783
|
+
self._validate_ref(from_ref, "drag source")
|
|
1784
|
+
self._validate_ref(to_ref, "drag target")
|
|
1785
|
+
|
|
1786
|
+
# Get element analysis to find coordinates
|
|
1787
|
+
analysis = await self._get_unified_analysis()
|
|
1788
|
+
elements = analysis.get("elements", {})
|
|
1789
|
+
|
|
1790
|
+
if from_ref not in elements:
|
|
1791
|
+
logger.error(
|
|
1792
|
+
f"Error: Source element reference '{from_ref}' not found."
|
|
1793
|
+
)
|
|
1794
|
+
snapshot = self._format_snapshot_from_analysis(analysis)
|
|
1795
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1796
|
+
return {
|
|
1797
|
+
"result": (
|
|
1798
|
+
f"Error: Source element reference '{from_ref}' not found."
|
|
1799
|
+
),
|
|
1800
|
+
"snapshot": snapshot,
|
|
1801
|
+
**tab_info,
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
if to_ref not in elements:
|
|
1805
|
+
logger.error(
|
|
1806
|
+
f"Error: Target element reference '{to_ref}' not found."
|
|
1807
|
+
)
|
|
1808
|
+
snapshot = self._format_snapshot_from_analysis(analysis)
|
|
1809
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1810
|
+
return {
|
|
1811
|
+
"result": (
|
|
1812
|
+
f"Error: Target element reference '{to_ref}' not found."
|
|
1813
|
+
),
|
|
1814
|
+
"snapshot": snapshot,
|
|
1815
|
+
**tab_info,
|
|
1816
|
+
}
|
|
1817
|
+
|
|
1818
|
+
action = {
|
|
1819
|
+
"type": "mouse_drag",
|
|
1820
|
+
"from_ref": from_ref,
|
|
1821
|
+
"to_ref": to_ref,
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
result = await self._exec_with_snapshot(action)
|
|
1825
|
+
|
|
1826
|
+
# Add tab information to the result
|
|
1827
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1828
|
+
result.update(tab_info)
|
|
1829
|
+
|
|
1830
|
+
return result
|
|
1831
|
+
|
|
1832
|
+
@action_logger
|
|
1833
|
+
async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
|
|
1834
|
+
r"""Press key and key combinations.
|
|
1835
|
+
Supports single key press or combination of keys by concatenating
|
|
1836
|
+
them with '+' separator.
|
|
1837
|
+
|
|
1838
|
+
Args:
|
|
1839
|
+
keys (List[str]): key or list of keys.
|
|
1840
|
+
|
|
1841
|
+
Returns:
|
|
1842
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1843
|
+
- "result" (str): Confirmation of the action.
|
|
1844
|
+
- "snapshot" (str): A new page snapshot.
|
|
1845
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1846
|
+
- "current_tab" (int): Index of the active tab.
|
|
1847
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1848
|
+
"""
|
|
1849
|
+
if not isinstance(keys, list) or not all(
|
|
1850
|
+
isinstance(item, str) for item in keys
|
|
1851
|
+
):
|
|
1852
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1853
|
+
return {
|
|
1854
|
+
"result": "Error: Expected keys as a list of strings.",
|
|
1855
|
+
"snapshot": "",
|
|
1856
|
+
**tab_info,
|
|
1857
|
+
}
|
|
1858
|
+
action = {"type": "press_key", "keys": keys}
|
|
1859
|
+
|
|
1860
|
+
result = await self._exec_with_snapshot(action)
|
|
1861
|
+
|
|
1862
|
+
# Add tab information to the result
|
|
1863
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1864
|
+
result.update(tab_info)
|
|
1865
|
+
|
|
1866
|
+
return result
|
|
1867
|
+
|
|
1706
1868
|
@action_logger
|
|
1707
1869
|
async def browser_wait_user(
|
|
1708
1870
|
self, timeout_sec: Optional[float] = None
|
|
@@ -1830,6 +1992,148 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1830
1992
|
await agent.process_command(task_prompt, max_steps=max_steps)
|
|
1831
1993
|
return "Task processing finished - see stdout for detailed trace."
|
|
1832
1994
|
|
|
1995
|
+
@action_logger
|
|
1996
|
+
async def browser_console_view(self) -> Dict[str, Any]:
|
|
1997
|
+
r"""View current page console logs.
|
|
1998
|
+
|
|
1999
|
+
Returns:
|
|
2000
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
2001
|
+
- console_messages (List[Dict]) : collection of logs from the
|
|
2002
|
+
browser console
|
|
2003
|
+
"""
|
|
2004
|
+
try:
|
|
2005
|
+
logs = await self._session.get_console_logs()
|
|
2006
|
+
# make output JSON serializable
|
|
2007
|
+
return {"console_messages": list(logs)}
|
|
2008
|
+
except Exception as e:
|
|
2009
|
+
logger.warning(f"Failed to retrieve logs: {e}")
|
|
2010
|
+
return {"console_messages": []}
|
|
2011
|
+
|
|
2012
|
+
async def browser_console_exec(self, code: str) -> Dict[str, Any]:
|
|
2013
|
+
r"""Execute javascript code in the console of the current page and get
|
|
2014
|
+
results.
|
|
2015
|
+
|
|
2016
|
+
Args:
|
|
2017
|
+
code (str): JavaScript code for execution.
|
|
2018
|
+
|
|
2019
|
+
Returns:
|
|
2020
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
2021
|
+
- "result" (str): Result of the action.
|
|
2022
|
+
- "console_output" (List[str]): Console log outputs during
|
|
2023
|
+
execution.
|
|
2024
|
+
- "snapshot" (str): A new page snapshot.
|
|
2025
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
2026
|
+
- "current_tab" (int): Index of the active tab.
|
|
2027
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
2028
|
+
"""
|
|
2029
|
+
page = await self._require_page()
|
|
2030
|
+
|
|
2031
|
+
try:
|
|
2032
|
+
logger.info("Executing JavaScript code in browser console.")
|
|
2033
|
+
exec_start = time.time()
|
|
2034
|
+
|
|
2035
|
+
# Wrap the code to capture console.log output and handle
|
|
2036
|
+
# expressions
|
|
2037
|
+
wrapped_code = (
|
|
2038
|
+
"""
|
|
2039
|
+
(function() {
|
|
2040
|
+
const _logs = [];
|
|
2041
|
+
const originalLog = console.log;
|
|
2042
|
+
console.log = function(...args) {
|
|
2043
|
+
_logs.push(args.map(arg => {
|
|
2044
|
+
try {
|
|
2045
|
+
return typeof arg === 'object' ?
|
|
2046
|
+
JSON.stringify(arg) : String(arg);
|
|
2047
|
+
} catch (e) {
|
|
2048
|
+
return String(arg);
|
|
2049
|
+
}
|
|
2050
|
+
}).join(' '));
|
|
2051
|
+
originalLog.apply(console, args);
|
|
2052
|
+
};
|
|
2053
|
+
|
|
2054
|
+
let result;
|
|
2055
|
+
try {
|
|
2056
|
+
// First try to evaluate as an expression
|
|
2057
|
+
// (like browser console)
|
|
2058
|
+
result = eval("""
|
|
2059
|
+
+ repr(code)
|
|
2060
|
+
+ """);
|
|
2061
|
+
} catch (e) {
|
|
2062
|
+
// If that fails, execute as statements
|
|
2063
|
+
try {
|
|
2064
|
+
result = (function() { """
|
|
2065
|
+
+ code
|
|
2066
|
+
+ """ })();
|
|
2067
|
+
} catch (error) {
|
|
2068
|
+
console.log = originalLog;
|
|
2069
|
+
throw error;
|
|
2070
|
+
}
|
|
2071
|
+
}
|
|
2072
|
+
|
|
2073
|
+
console.log = originalLog;
|
|
2074
|
+
return { result, logs: _logs };
|
|
2075
|
+
})()
|
|
2076
|
+
"""
|
|
2077
|
+
)
|
|
2078
|
+
|
|
2079
|
+
eval_result = await page.evaluate(wrapped_code)
|
|
2080
|
+
result = eval_result.get('result')
|
|
2081
|
+
console_logs = eval_result.get('logs', [])
|
|
2082
|
+
|
|
2083
|
+
exec_time = time.time() - exec_start
|
|
2084
|
+
logger.info(f"Code execution completed in {exec_time:.2f}s.")
|
|
2085
|
+
|
|
2086
|
+
import asyncio
|
|
2087
|
+
import json
|
|
2088
|
+
|
|
2089
|
+
await asyncio.sleep(0.2)
|
|
2090
|
+
|
|
2091
|
+
# Get snapshot
|
|
2092
|
+
logger.info("Capturing page snapshot after code execution.")
|
|
2093
|
+
snapshot_start = time.time()
|
|
2094
|
+
snapshot = await self._session.get_snapshot(
|
|
2095
|
+
force_refresh=True, diff_only=False
|
|
2096
|
+
)
|
|
2097
|
+
snapshot_time = time.time() - snapshot_start
|
|
2098
|
+
logger.info(
|
|
2099
|
+
f"Code execution snapshot captured in " f"{snapshot_time:.2f}s"
|
|
2100
|
+
)
|
|
2101
|
+
|
|
2102
|
+
# Get tab information
|
|
2103
|
+
tab_info = await self._get_tab_info_for_output()
|
|
2104
|
+
|
|
2105
|
+
# Properly serialize the result
|
|
2106
|
+
try:
|
|
2107
|
+
result_str = json.dumps(result, indent=2)
|
|
2108
|
+
except (TypeError, ValueError):
|
|
2109
|
+
result_str = str(result)
|
|
2110
|
+
|
|
2111
|
+
return {
|
|
2112
|
+
"result": f"Code execution result: {result_str}",
|
|
2113
|
+
"console_output": console_logs,
|
|
2114
|
+
"snapshot": snapshot,
|
|
2115
|
+
**tab_info,
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
except Exception as e:
|
|
2119
|
+
logger.warning(f"Code execution failed: {e}")
|
|
2120
|
+
# Get tab information for error case
|
|
2121
|
+
try:
|
|
2122
|
+
tab_info = await self._get_tab_info_for_output()
|
|
2123
|
+
except Exception:
|
|
2124
|
+
tab_info = {
|
|
2125
|
+
"tabs": [],
|
|
2126
|
+
"current_tab": 0,
|
|
2127
|
+
"total_tabs": 0,
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
return {
|
|
2131
|
+
"result": f"Code execution failed: {e}",
|
|
2132
|
+
"console_output": [],
|
|
2133
|
+
"snapshot": "",
|
|
2134
|
+
**tab_info,
|
|
2135
|
+
}
|
|
2136
|
+
|
|
1833
2137
|
def get_log_summary(self) -> Dict[str, Any]:
|
|
1834
2138
|
r"""Get a summary of logged actions."""
|
|
1835
2139
|
if not self.log_buffer:
|
|
@@ -2045,11 +2349,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
2045
2349
|
"browser_select": self.browser_select,
|
|
2046
2350
|
"browser_scroll": self.browser_scroll,
|
|
2047
2351
|
"browser_enter": self.browser_enter,
|
|
2352
|
+
"browser_mouse_control": self.browser_mouse_control,
|
|
2353
|
+
"browser_mouse_drag": self.browser_mouse_drag,
|
|
2354
|
+
"browser_press_key": self.browser_press_key,
|
|
2048
2355
|
"browser_wait_user": self.browser_wait_user,
|
|
2049
2356
|
"browser_solve_task": self.browser_solve_task,
|
|
2050
2357
|
"browser_switch_tab": self.browser_switch_tab,
|
|
2051
2358
|
"browser_close_tab": self.browser_close_tab,
|
|
2052
2359
|
"browser_get_tab_info": self.browser_get_tab_info,
|
|
2360
|
+
"browser_console_view": self.browser_console_view,
|
|
2361
|
+
"browser_console_exec": self.browser_console_exec,
|
|
2053
2362
|
}
|
|
2054
2363
|
|
|
2055
2364
|
enabled_tools = []
|
|
@@ -43,7 +43,11 @@ class PageSnapshot:
|
|
|
43
43
|
# Public API
|
|
44
44
|
# ---------------------------------------------------------------------
|
|
45
45
|
async def capture(
|
|
46
|
-
self,
|
|
46
|
+
self,
|
|
47
|
+
*,
|
|
48
|
+
force_refresh: bool = False,
|
|
49
|
+
diff_only: bool = False,
|
|
50
|
+
viewport_limit: bool = False,
|
|
47
51
|
) -> str:
|
|
48
52
|
"""Return current snapshot or just the diff to previous one."""
|
|
49
53
|
try:
|
|
@@ -65,7 +69,9 @@ class PageSnapshot:
|
|
|
65
69
|
)
|
|
66
70
|
|
|
67
71
|
logger.debug("Capturing page snapshot …")
|
|
68
|
-
snapshot_result = await self._get_snapshot_direct(
|
|
72
|
+
snapshot_result = await self._get_snapshot_direct(
|
|
73
|
+
viewport_limit=viewport_limit
|
|
74
|
+
)
|
|
69
75
|
|
|
70
76
|
# Extract snapshot text from the unified analyzer result
|
|
71
77
|
if (
|
|
@@ -111,7 +117,7 @@ class PageSnapshot:
|
|
|
111
117
|
_snapshot_js_cache: Optional[str] = None # class-level cache
|
|
112
118
|
|
|
113
119
|
async def _get_snapshot_direct(
|
|
114
|
-
self,
|
|
120
|
+
self, viewport_limit: bool = False
|
|
115
121
|
) -> Optional[Union[str, Dict[str, Any]]]:
|
|
116
122
|
r"""Evaluate the snapshot-extraction JS with simple retry logic.
|
|
117
123
|
|
|
@@ -133,7 +139,7 @@ class PageSnapshot:
|
|
|
133
139
|
retries: int = 3
|
|
134
140
|
while retries > 0:
|
|
135
141
|
try:
|
|
136
|
-
return await self.page.evaluate(js_code)
|
|
142
|
+
return await self.page.evaluate(js_code, viewport_limit)
|
|
137
143
|
except Exception as e:
|
|
138
144
|
msg = str(e)
|
|
139
145
|
|