camel-ai 0.2.73a11__py3-none-any.whl → 0.2.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (28) hide show
  1. camel/__init__.py +1 -1
  2. camel/interpreters/e2b_interpreter.py +34 -1
  3. camel/models/anthropic_model.py +5 -3
  4. camel/societies/workforce/prompts.py +3 -19
  5. camel/societies/workforce/workforce.py +13 -8
  6. camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
  7. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
  8. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
  9. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
  10. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
  11. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
  12. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
  13. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
  14. camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
  15. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
  16. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
  17. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
  18. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
  19. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
  20. camel/toolkits/note_taking_toolkit.py +3 -4
  21. camel/toolkits/search_toolkit.py +192 -59
  22. camel/toolkits/terminal_toolkit.py +12 -2
  23. camel/types/enums.py +3 -0
  24. camel/utils/token_counting.py +13 -2
  25. {camel_ai-0.2.73a11.dist-info → camel_ai-0.2.74.dist-info}/METADATA +37 -4
  26. {camel_ai-0.2.73a11.dist-info → camel_ai-0.2.74.dist-info}/RECORD +28 -28
  27. {camel_ai-0.2.73a11.dist-info → camel_ai-0.2.74.dist-info}/WHEEL +0 -0
  28. {camel_ai-0.2.73a11.dist-info → camel_ai-0.2.74.dist-info}/licenses/LICENSE +0 -0
@@ -14,6 +14,7 @@
14
14
  from __future__ import annotations
15
15
 
16
16
  import asyncio
17
+ from collections import deque
17
18
  from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Tuple
18
19
 
19
20
  from camel.logger import get_logger
@@ -26,6 +27,7 @@ if TYPE_CHECKING:
26
27
  from playwright.async_api import (
27
28
  Browser,
28
29
  BrowserContext,
30
+ ConsoleMessage,
29
31
  Page,
30
32
  Playwright,
31
33
  )
@@ -188,7 +190,9 @@ class HybridBrowserSession:
188
190
 
189
191
  # Dictionary-based tab management with monotonic IDs
190
192
  self._pages: Dict[str, Page] = {} # tab_id -> Page object
193
+ self._console_logs: Dict[str, Any] = {} # tab_id -> page logs
191
194
  self._current_tab_id: Optional[str] = None # Current active tab ID
195
+ self.log_limit: int = ConfigLoader.get_max_log_limit() or 1000
192
196
 
193
197
  self.snapshot: Optional[PageSnapshot] = None
194
198
  self.executor: Optional[ActionExecutor] = None
@@ -266,7 +270,7 @@ class HybridBrowserSession:
266
270
  )
267
271
 
268
272
  # Store in pages dictionary
269
- self._pages[tab_id] = new_page
273
+ await self._register_new_page(tab_id, new_page)
270
274
 
271
275
  # Navigate if URL provided
272
276
  if url:
@@ -281,6 +285,32 @@ class HybridBrowserSession:
281
285
  )
282
286
  return tab_id
283
287
 
288
+ async def _register_new_page(self, tab_id: str, new_page: "Page") -> None:
289
+ r"""Register a page and add console event listerers.
290
+
291
+ Args:
292
+ new_page (Page): The new page object to register.
293
+ """
294
+ # Add new page
295
+ self._pages[tab_id] = new_page
296
+ # Create log for the page
297
+ self._console_logs[tab_id] = deque(maxlen=self.log_limit)
298
+
299
+ # Add event function
300
+ def handle_console_log(msg: ConsoleMessage):
301
+ logs = self._console_logs.get(tab_id)
302
+ if logs is not None:
303
+ logs.append({"type": msg.type, "text": msg.text})
304
+
305
+ # Add event listener for console logs
306
+ new_page.on(event="console", f=handle_console_log)
307
+
308
+ def handle_page_close(page: "Page"):
309
+ self._console_logs.pop(tab_id, None)
310
+
311
+ # Add event listener for cleanup
312
+ new_page.on(event="close", f=handle_page_close)
313
+
284
314
  async def register_page(self, new_page: "Page") -> str:
285
315
  r"""Register a page that was created externally (e.g., by a click).
286
316
 
@@ -297,7 +327,7 @@ class HybridBrowserSession:
297
327
 
298
328
  # Create new ID for the page
299
329
  tab_id = await TabIdGenerator.generate_tab_id()
300
- self._pages[tab_id] = new_page
330
+ await self._register_new_page(tab_id, new_page)
301
331
 
302
332
  logger.info(
303
333
  f"Registered new tab {tab_id} (opened by user action). "
@@ -458,6 +488,7 @@ class HybridBrowserSession:
458
488
  self._context = singleton_instance._context
459
489
  self._page = singleton_instance._page
460
490
  self._pages = singleton_instance._pages
491
+ self._console_logs = singleton_instance._console_logs
461
492
  self._current_tab_id = singleton_instance._current_tab_id
462
493
  self.snapshot = singleton_instance.snapshot
463
494
  self.executor = singleton_instance.executor
@@ -502,16 +533,16 @@ class HybridBrowserSession:
502
533
  self._page = pages[0]
503
534
  # Create ID for initial page
504
535
  initial_tab_id = await TabIdGenerator.generate_tab_id()
505
- self._pages[initial_tab_id] = pages[0]
536
+ await self._register_new_page(initial_tab_id, pages[0])
506
537
  self._current_tab_id = initial_tab_id
507
538
  # Handle additional pages if any
508
539
  for page in pages[1:]:
509
540
  tab_id = await TabIdGenerator.generate_tab_id()
510
- self._pages[tab_id] = page
541
+ await self._register_new_page(tab_id, page)
511
542
  else:
512
543
  self._page = await context.new_page()
513
544
  initial_tab_id = await TabIdGenerator.generate_tab_id()
514
- self._pages[initial_tab_id] = self._page
545
+ await self._register_new_page(initial_tab_id, self._page)
515
546
  self._current_tab_id = initial_tab_id
516
547
  else:
517
548
  self._browser = await self._playwright.chromium.launch(
@@ -522,7 +553,7 @@ class HybridBrowserSession:
522
553
 
523
554
  # Create ID for initial page
524
555
  initial_tab_id = await TabIdGenerator.generate_tab_id()
525
- self._pages[initial_tab_id] = self._page
556
+ await self._register_new_page(initial_tab_id, self._page)
526
557
  self._current_tab_id = initial_tab_id
527
558
 
528
559
  # Apply stealth modifications if enabled
@@ -713,13 +744,19 @@ class HybridBrowserSession:
713
744
  return f"Navigated to {url}"
714
745
 
715
746
  async def get_snapshot(
716
- self, *, force_refresh: bool = False, diff_only: bool = False
747
+ self,
748
+ *,
749
+ force_refresh: bool = False,
750
+ diff_only: bool = False,
751
+ viewport_limit: bool = False,
717
752
  ) -> str:
718
753
  r"""Get snapshot for current tab."""
719
754
  if not self.snapshot:
720
755
  return "<empty>"
721
756
  return await self.snapshot.capture(
722
- force_refresh=force_refresh, diff_only=diff_only
757
+ force_refresh=force_refresh,
758
+ diff_only=diff_only,
759
+ viewport_limit=viewport_limit,
723
760
  )
724
761
 
725
762
  async def exec_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
@@ -738,3 +775,13 @@ class HybridBrowserSession:
738
775
  if self._page is None:
739
776
  raise RuntimeError("No active page available")
740
777
  return self._page
778
+
779
+ async def get_console_logs(self) -> Dict[str, Any]:
780
+ r"""Get current active logs."""
781
+ await self.ensure_browser()
782
+ if self._current_tab_id is None:
783
+ raise RuntimeError("No active tab available")
784
+ logs = self._console_logs.get(self._current_tab_id, None)
785
+ if logs is None:
786
+ raise RuntimeError("No active logs available for the page")
787
+ return logs
@@ -40,6 +40,9 @@ class BrowserConfig:
40
40
  # Default action limits
41
41
  DEFAULT_MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
42
42
 
43
+ # Default config limits
44
+ DEFAULT_MAX_LOG_LIMIT = 1000
45
+
43
46
  @staticmethod
44
47
  def get_timeout_config() -> Dict[str, int]:
45
48
  r"""Get timeout configuration with environment variable support.
@@ -108,6 +111,22 @@ class BrowserConfig:
108
111
  ),
109
112
  }
110
113
 
114
+ @staticmethod
115
+ def get_log_limits() -> Dict[str, int]:
116
+ r"""Get log limits configuration with environment variable support.
117
+
118
+ Returns:
119
+ Dict[str, int]: Console Log limits configuration.
120
+ """
121
+ return {
122
+ 'max_log_limit': int(
123
+ os.getenv(
124
+ 'HYBRID_BROWSER_MAX_LOG_LIMIT',
125
+ BrowserConfig.DEFAULT_MAX_LOG_LIMIT,
126
+ )
127
+ ),
128
+ }
129
+
111
130
  @staticmethod
112
131
  def get_action_timeout(override: Optional[int] = None) -> int:
113
132
  r"""Get action timeout with optional override.
@@ -178,6 +197,20 @@ class BrowserConfig:
178
197
  return override
179
198
  return BrowserConfig.get_action_limits()['max_scroll_amount']
180
199
 
200
+ @staticmethod
201
+ def get_max_log_limit(override: Optional[int] = None) -> int:
202
+ r"""Get maximum log limit with optional override.
203
+
204
+ Args:
205
+ override: Optional log limit override value.
206
+
207
+ Returns:
208
+ int: Maximum log limit.
209
+ """
210
+ if override is not None:
211
+ return override
212
+ return BrowserConfig.get_log_limits()['max_log_limit']
213
+
181
214
  @staticmethod
182
215
  def get_screenshot_timeout(override: Optional[int] = None) -> int:
183
216
  r"""Get screenshot timeout with optional override.
@@ -370,6 +403,11 @@ class ConfigLoader:
370
403
  r"""Get maximum scroll amount with optional override."""
371
404
  return BrowserConfig.get_max_scroll_amount(override)
372
405
 
406
+ @classmethod
407
+ def get_max_log_limit(cls, override: Optional[int] = None) -> int:
408
+ r"""Get maximum log limit with optional override."""
409
+ return BrowserConfig.get_max_log_limit(override)
410
+
373
411
  @classmethod
374
412
  def get_screenshot_timeout(cls, override: Optional[int] = None) -> int:
375
413
  r"""Get screenshot timeout with optional override."""
@@ -432,6 +470,11 @@ def get_max_scroll_amount(override: Optional[int] = None) -> int:
432
470
  return BrowserConfig.get_max_scroll_amount(override)
433
471
 
434
472
 
473
+ def get_max_log_limit(override: Optional[int] = None) -> int:
474
+ r"""Get maximum log limit with optional override."""
475
+ return BrowserConfig.get_max_log_limit(override)
476
+
477
+
435
478
  def get_screenshot_timeout(override: Optional[int] = None) -> int:
436
479
  r"""Get screenshot timeout with optional override."""
437
480
  return BrowserConfig.get_screenshot_timeout(override)
@@ -73,11 +73,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
73
73
  "browser_select",
74
74
  "browser_scroll",
75
75
  "browser_enter",
76
+ "browser_mouse_control",
77
+ "browser_mouse_drag",
78
+ "browser_press_key",
76
79
  "browser_wait_user",
77
80
  "browser_solve_task",
78
81
  "browser_switch_tab",
79
82
  "browser_close_tab",
80
83
  "browser_get_tab_info",
84
+ "browser_console_view",
85
+ "browser_console_exec",
81
86
  ]
82
87
 
83
88
  def __init__(
@@ -99,6 +104,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
99
104
  screenshot_timeout: Optional[int] = None,
100
105
  page_stability_timeout: Optional[int] = None,
101
106
  dom_content_loaded_timeout: Optional[int] = None,
107
+ viewport_limit: bool = False,
102
108
  ) -> None:
103
109
  r"""Initialize the HybridBrowserToolkit.
104
110
 
@@ -182,6 +188,10 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
182
188
  HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to
183
189
  5000ms.
184
190
  Defaults to `None`.
191
+ viewport_limit (bool): When True, only return snapshot results
192
+ visible in the current viewport. When False, return all
193
+ elements on the page regardless of visibility.
194
+ Defaults to `False`.
185
195
  """
186
196
  super().__init__()
187
197
  RegisteredAgentToolkit.__init__(self)
@@ -193,6 +203,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
193
203
  self._browser_log_to_file = browser_log_to_file
194
204
  self._default_start_url = default_start_url
195
205
  self._session_id = session_id or "default"
206
+ self._viewport_limit = viewport_limit
196
207
 
197
208
  # Store timeout configuration
198
209
  self._default_timeout = default_timeout
@@ -309,7 +320,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
309
320
  # Try to close browser with a timeout to prevent hanging
310
321
  try:
311
322
  loop.run_until_complete(
312
- asyncio.wait_for(self.close_browser(), timeout=2.0)
323
+ asyncio.wait_for(self.browser_close(), timeout=2.0)
313
324
  )
314
325
  except asyncio.TimeoutError:
315
326
  pass # Skip cleanup if it takes too long
@@ -550,7 +561,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
550
561
  )
551
562
 
552
563
  async def _get_unified_analysis(
553
- self, max_retries: int = 3
564
+ self, max_retries: int = 3, viewport_limit: Optional[bool] = None
554
565
  ) -> Dict[str, Any]:
555
566
  r"""Get unified analysis data from the page with retry mechanism for
556
567
  navigation issues."""
@@ -573,7 +584,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
573
584
  # Don't fail if DOM wait times out
574
585
  pass
575
586
 
576
- result = await page.evaluate(self._unified_script)
587
+ # Use instance viewport_limit if parameter not provided
588
+ use_viewport_limit = (
589
+ viewport_limit
590
+ if viewport_limit is not None
591
+ else self._viewport_limit
592
+ )
593
+ result = await page.evaluate(
594
+ self._unified_script, use_viewport_limit
595
+ )
577
596
 
578
597
  if not isinstance(result, dict):
579
598
  logger.warning(f"Invalid result type: {type(result)}")
@@ -1703,6 +1722,149 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1703
1722
 
1704
1723
  return result
1705
1724
 
1725
+ @action_logger
1726
+ async def browser_mouse_control(
1727
+ self, *, control: str, x: float, y: float
1728
+ ) -> Dict[str, Any]:
1729
+ r"""Control the mouse to interact with browser with x, y coordinates
1730
+
1731
+ Args:
1732
+ control (str): The action to perform: 'click', 'right_click'
1733
+ or 'dblclick'.
1734
+ x (float): x-coordinate for the control action.
1735
+ y (float): y-coordinate for the control action.
1736
+
1737
+ Returns:
1738
+ Dict[str, Any]: A dictionary with the result of the action:
1739
+ - "result" (str): Confirmation of the action.
1740
+ - "snapshot" (str): A new page snapshot.
1741
+ - "tabs" (List[Dict]): Information about all open tabs.
1742
+ - "current_tab" (int): Index of the active tab.
1743
+ - "total_tabs" (int): Total number of open tabs.
1744
+ """
1745
+ if control not in ("click", "right_click", "dblclick"):
1746
+ tab_info = await self._get_tab_info_for_output()
1747
+ return {
1748
+ "result": "Error: supported control actions are "
1749
+ "'click' or 'dblclick'",
1750
+ "snapshot": "",
1751
+ **tab_info,
1752
+ }
1753
+
1754
+ action = {"type": "mouse_control", "control": control, "x": x, "y": y}
1755
+
1756
+ result = await self._exec_with_snapshot(action)
1757
+
1758
+ # Add tab information to the result
1759
+ tab_info = await self._get_tab_info_for_output()
1760
+ result.update(tab_info)
1761
+
1762
+ return result
1763
+
1764
+ @action_logger
1765
+ async def browser_mouse_drag(
1766
+ self, *, from_ref: str, to_ref: str
1767
+ ) -> Dict[str, Any]:
1768
+ r"""Control the mouse to drag and drop in the browser using ref IDs.
1769
+
1770
+ Args:
1771
+ from_ref (str): The `ref` ID of the source element to drag from.
1772
+ to_ref (str): The `ref` ID of the target element to drag to.
1773
+
1774
+ Returns:
1775
+ Dict[str, Any]: A dictionary with the result of the action:
1776
+ - "result" (str): Confirmation of the action.
1777
+ - "snapshot" (str): A new page snapshot.
1778
+ - "tabs" (List[Dict]): Information about all open tabs.
1779
+ - "current_tab" (int): Index of the active tab.
1780
+ - "total_tabs" (int): Total number of open tabs.
1781
+ """
1782
+ # Validate refs
1783
+ self._validate_ref(from_ref, "drag source")
1784
+ self._validate_ref(to_ref, "drag target")
1785
+
1786
+ # Get element analysis to find coordinates
1787
+ analysis = await self._get_unified_analysis()
1788
+ elements = analysis.get("elements", {})
1789
+
1790
+ if from_ref not in elements:
1791
+ logger.error(
1792
+ f"Error: Source element reference '{from_ref}' not found."
1793
+ )
1794
+ snapshot = self._format_snapshot_from_analysis(analysis)
1795
+ tab_info = await self._get_tab_info_for_output()
1796
+ return {
1797
+ "result": (
1798
+ f"Error: Source element reference '{from_ref}' not found."
1799
+ ),
1800
+ "snapshot": snapshot,
1801
+ **tab_info,
1802
+ }
1803
+
1804
+ if to_ref not in elements:
1805
+ logger.error(
1806
+ f"Error: Target element reference '{to_ref}' not found."
1807
+ )
1808
+ snapshot = self._format_snapshot_from_analysis(analysis)
1809
+ tab_info = await self._get_tab_info_for_output()
1810
+ return {
1811
+ "result": (
1812
+ f"Error: Target element reference '{to_ref}' not found."
1813
+ ),
1814
+ "snapshot": snapshot,
1815
+ **tab_info,
1816
+ }
1817
+
1818
+ action = {
1819
+ "type": "mouse_drag",
1820
+ "from_ref": from_ref,
1821
+ "to_ref": to_ref,
1822
+ }
1823
+
1824
+ result = await self._exec_with_snapshot(action)
1825
+
1826
+ # Add tab information to the result
1827
+ tab_info = await self._get_tab_info_for_output()
1828
+ result.update(tab_info)
1829
+
1830
+ return result
1831
+
1832
+ @action_logger
1833
+ async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
1834
+ r"""Press key and key combinations.
1835
+ Supports single key press or combination of keys by concatenating
1836
+ them with '+' separator.
1837
+
1838
+ Args:
1839
+ keys (List[str]): key or list of keys.
1840
+
1841
+ Returns:
1842
+ Dict[str, Any]: A dictionary with the result of the action:
1843
+ - "result" (str): Confirmation of the action.
1844
+ - "snapshot" (str): A new page snapshot.
1845
+ - "tabs" (List[Dict]): Information about all open tabs.
1846
+ - "current_tab" (int): Index of the active tab.
1847
+ - "total_tabs" (int): Total number of open tabs.
1848
+ """
1849
+ if not isinstance(keys, list) or not all(
1850
+ isinstance(item, str) for item in keys
1851
+ ):
1852
+ tab_info = await self._get_tab_info_for_output()
1853
+ return {
1854
+ "result": "Error: Expected keys as a list of strings.",
1855
+ "snapshot": "",
1856
+ **tab_info,
1857
+ }
1858
+ action = {"type": "press_key", "keys": keys}
1859
+
1860
+ result = await self._exec_with_snapshot(action)
1861
+
1862
+ # Add tab information to the result
1863
+ tab_info = await self._get_tab_info_for_output()
1864
+ result.update(tab_info)
1865
+
1866
+ return result
1867
+
1706
1868
  @action_logger
1707
1869
  async def browser_wait_user(
1708
1870
  self, timeout_sec: Optional[float] = None
@@ -1830,6 +1992,148 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1830
1992
  await agent.process_command(task_prompt, max_steps=max_steps)
1831
1993
  return "Task processing finished - see stdout for detailed trace."
1832
1994
 
1995
+ @action_logger
1996
+ async def browser_console_view(self) -> Dict[str, Any]:
1997
+ r"""View current page console logs.
1998
+
1999
+ Returns:
2000
+ Dict[str, Any]: A dictionary with the result of the action:
2001
+ - console_messages (List[Dict]) : collection of logs from the
2002
+ browser console
2003
+ """
2004
+ try:
2005
+ logs = await self._session.get_console_logs()
2006
+ # make output JSON serializable
2007
+ return {"console_messages": list(logs)}
2008
+ except Exception as e:
2009
+ logger.warning(f"Failed to retrieve logs: {e}")
2010
+ return {"console_messages": []}
2011
+
2012
+ async def browser_console_exec(self, code: str) -> Dict[str, Any]:
2013
+ r"""Execute javascript code in the console of the current page and get
2014
+ results.
2015
+
2016
+ Args:
2017
+ code (str): JavaScript code for execution.
2018
+
2019
+ Returns:
2020
+ Dict[str, Any]: A dictionary with the result of the action:
2021
+ - "result" (str): Result of the action.
2022
+ - "console_output" (List[str]): Console log outputs during
2023
+ execution.
2024
+ - "snapshot" (str): A new page snapshot.
2025
+ - "tabs" (List[Dict]): Information about all open tabs.
2026
+ - "current_tab" (int): Index of the active tab.
2027
+ - "total_tabs" (int): Total number of open tabs.
2028
+ """
2029
+ page = await self._require_page()
2030
+
2031
+ try:
2032
+ logger.info("Executing JavaScript code in browser console.")
2033
+ exec_start = time.time()
2034
+
2035
+ # Wrap the code to capture console.log output and handle
2036
+ # expressions
2037
+ wrapped_code = (
2038
+ """
2039
+ (function() {
2040
+ const _logs = [];
2041
+ const originalLog = console.log;
2042
+ console.log = function(...args) {
2043
+ _logs.push(args.map(arg => {
2044
+ try {
2045
+ return typeof arg === 'object' ?
2046
+ JSON.stringify(arg) : String(arg);
2047
+ } catch (e) {
2048
+ return String(arg);
2049
+ }
2050
+ }).join(' '));
2051
+ originalLog.apply(console, args);
2052
+ };
2053
+
2054
+ let result;
2055
+ try {
2056
+ // First try to evaluate as an expression
2057
+ // (like browser console)
2058
+ result = eval("""
2059
+ + repr(code)
2060
+ + """);
2061
+ } catch (e) {
2062
+ // If that fails, execute as statements
2063
+ try {
2064
+ result = (function() { """
2065
+ + code
2066
+ + """ })();
2067
+ } catch (error) {
2068
+ console.log = originalLog;
2069
+ throw error;
2070
+ }
2071
+ }
2072
+
2073
+ console.log = originalLog;
2074
+ return { result, logs: _logs };
2075
+ })()
2076
+ """
2077
+ )
2078
+
2079
+ eval_result = await page.evaluate(wrapped_code)
2080
+ result = eval_result.get('result')
2081
+ console_logs = eval_result.get('logs', [])
2082
+
2083
+ exec_time = time.time() - exec_start
2084
+ logger.info(f"Code execution completed in {exec_time:.2f}s.")
2085
+
2086
+ import asyncio
2087
+ import json
2088
+
2089
+ await asyncio.sleep(0.2)
2090
+
2091
+ # Get snapshot
2092
+ logger.info("Capturing page snapshot after code execution.")
2093
+ snapshot_start = time.time()
2094
+ snapshot = await self._session.get_snapshot(
2095
+ force_refresh=True, diff_only=False
2096
+ )
2097
+ snapshot_time = time.time() - snapshot_start
2098
+ logger.info(
2099
+ f"Code execution snapshot captured in " f"{snapshot_time:.2f}s"
2100
+ )
2101
+
2102
+ # Get tab information
2103
+ tab_info = await self._get_tab_info_for_output()
2104
+
2105
+ # Properly serialize the result
2106
+ try:
2107
+ result_str = json.dumps(result, indent=2)
2108
+ except (TypeError, ValueError):
2109
+ result_str = str(result)
2110
+
2111
+ return {
2112
+ "result": f"Code execution result: {result_str}",
2113
+ "console_output": console_logs,
2114
+ "snapshot": snapshot,
2115
+ **tab_info,
2116
+ }
2117
+
2118
+ except Exception as e:
2119
+ logger.warning(f"Code execution failed: {e}")
2120
+ # Get tab information for error case
2121
+ try:
2122
+ tab_info = await self._get_tab_info_for_output()
2123
+ except Exception:
2124
+ tab_info = {
2125
+ "tabs": [],
2126
+ "current_tab": 0,
2127
+ "total_tabs": 0,
2128
+ }
2129
+
2130
+ return {
2131
+ "result": f"Code execution failed: {e}",
2132
+ "console_output": [],
2133
+ "snapshot": "",
2134
+ **tab_info,
2135
+ }
2136
+
1833
2137
  def get_log_summary(self) -> Dict[str, Any]:
1834
2138
  r"""Get a summary of logged actions."""
1835
2139
  if not self.log_buffer:
@@ -2045,11 +2349,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
2045
2349
  "browser_select": self.browser_select,
2046
2350
  "browser_scroll": self.browser_scroll,
2047
2351
  "browser_enter": self.browser_enter,
2352
+ "browser_mouse_control": self.browser_mouse_control,
2353
+ "browser_mouse_drag": self.browser_mouse_drag,
2354
+ "browser_press_key": self.browser_press_key,
2048
2355
  "browser_wait_user": self.browser_wait_user,
2049
2356
  "browser_solve_task": self.browser_solve_task,
2050
2357
  "browser_switch_tab": self.browser_switch_tab,
2051
2358
  "browser_close_tab": self.browser_close_tab,
2052
2359
  "browser_get_tab_info": self.browser_get_tab_info,
2360
+ "browser_console_view": self.browser_console_view,
2361
+ "browser_console_exec": self.browser_console_exec,
2053
2362
  }
2054
2363
 
2055
2364
  enabled_tools = []
@@ -43,7 +43,11 @@ class PageSnapshot:
43
43
  # Public API
44
44
  # ---------------------------------------------------------------------
45
45
  async def capture(
46
- self, *, force_refresh: bool = False, diff_only: bool = False
46
+ self,
47
+ *,
48
+ force_refresh: bool = False,
49
+ diff_only: bool = False,
50
+ viewport_limit: bool = False,
47
51
  ) -> str:
48
52
  """Return current snapshot or just the diff to previous one."""
49
53
  try:
@@ -65,7 +69,9 @@ class PageSnapshot:
65
69
  )
66
70
 
67
71
  logger.debug("Capturing page snapshot …")
68
- snapshot_result = await self._get_snapshot_direct()
72
+ snapshot_result = await self._get_snapshot_direct(
73
+ viewport_limit=viewport_limit
74
+ )
69
75
 
70
76
  # Extract snapshot text from the unified analyzer result
71
77
  if (
@@ -111,7 +117,7 @@ class PageSnapshot:
111
117
  _snapshot_js_cache: Optional[str] = None # class-level cache
112
118
 
113
119
  async def _get_snapshot_direct(
114
- self,
120
+ self, viewport_limit: bool = False
115
121
  ) -> Optional[Union[str, Dict[str, Any]]]:
116
122
  r"""Evaluate the snapshot-extraction JS with simple retry logic.
117
123
 
@@ -133,7 +139,7 @@ class PageSnapshot:
133
139
  retries: int = 3
134
140
  while retries > 0:
135
141
  try:
136
- return await self.page.evaluate(js_code)
142
+ return await self.page.evaluate(js_code, viewport_limit)
137
143
  except Exception as e:
138
144
  msg = str(e)
139
145