camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_utils.py +38 -0
  3. camel/agents/chat_agent.py +2217 -519
  4. camel/agents/mcp_agent.py +30 -27
  5. camel/configs/__init__.py +15 -0
  6. camel/configs/aihubmix_config.py +88 -0
  7. camel/configs/amd_config.py +70 -0
  8. camel/configs/cometapi_config.py +104 -0
  9. camel/configs/minimax_config.py +93 -0
  10. camel/configs/nebius_config.py +103 -0
  11. camel/data_collectors/alpaca_collector.py +15 -6
  12. camel/datasets/base_generator.py +39 -10
  13. camel/environments/single_step.py +28 -3
  14. camel/environments/tic_tac_toe.py +1 -1
  15. camel/interpreters/__init__.py +2 -0
  16. camel/interpreters/docker/Dockerfile +3 -12
  17. camel/interpreters/e2b_interpreter.py +34 -1
  18. camel/interpreters/microsandbox_interpreter.py +395 -0
  19. camel/loaders/__init__.py +11 -2
  20. camel/loaders/chunkr_reader.py +9 -0
  21. camel/memories/agent_memories.py +48 -4
  22. camel/memories/base.py +26 -0
  23. camel/memories/blocks/chat_history_block.py +122 -4
  24. camel/memories/context_creators/score_based.py +25 -384
  25. camel/memories/records.py +88 -8
  26. camel/messages/base.py +153 -34
  27. camel/models/__init__.py +10 -0
  28. camel/models/aihubmix_model.py +83 -0
  29. camel/models/aiml_model.py +1 -16
  30. camel/models/amd_model.py +101 -0
  31. camel/models/anthropic_model.py +6 -19
  32. camel/models/aws_bedrock_model.py +2 -33
  33. camel/models/azure_openai_model.py +114 -89
  34. camel/models/base_audio_model.py +3 -1
  35. camel/models/base_model.py +32 -14
  36. camel/models/cohere_model.py +1 -16
  37. camel/models/cometapi_model.py +83 -0
  38. camel/models/crynux_model.py +1 -16
  39. camel/models/deepseek_model.py +1 -16
  40. camel/models/fish_audio_model.py +6 -0
  41. camel/models/gemini_model.py +36 -18
  42. camel/models/groq_model.py +1 -17
  43. camel/models/internlm_model.py +1 -16
  44. camel/models/litellm_model.py +1 -16
  45. camel/models/lmstudio_model.py +1 -17
  46. camel/models/minimax_model.py +83 -0
  47. camel/models/mistral_model.py +1 -16
  48. camel/models/model_factory.py +27 -1
  49. camel/models/modelscope_model.py +1 -16
  50. camel/models/moonshot_model.py +105 -24
  51. camel/models/nebius_model.py +83 -0
  52. camel/models/nemotron_model.py +0 -5
  53. camel/models/netmind_model.py +1 -16
  54. camel/models/novita_model.py +1 -16
  55. camel/models/nvidia_model.py +1 -16
  56. camel/models/ollama_model.py +4 -19
  57. camel/models/openai_compatible_model.py +62 -41
  58. camel/models/openai_model.py +62 -57
  59. camel/models/openrouter_model.py +1 -17
  60. camel/models/ppio_model.py +1 -16
  61. camel/models/qianfan_model.py +1 -16
  62. camel/models/qwen_model.py +1 -16
  63. camel/models/reka_model.py +1 -16
  64. camel/models/samba_model.py +34 -47
  65. camel/models/sglang_model.py +64 -31
  66. camel/models/siliconflow_model.py +1 -16
  67. camel/models/stub_model.py +0 -4
  68. camel/models/togetherai_model.py +1 -16
  69. camel/models/vllm_model.py +1 -16
  70. camel/models/volcano_model.py +0 -17
  71. camel/models/watsonx_model.py +1 -16
  72. camel/models/yi_model.py +1 -16
  73. camel/models/zhipuai_model.py +60 -16
  74. camel/parsers/__init__.py +18 -0
  75. camel/parsers/mcp_tool_call_parser.py +176 -0
  76. camel/retrievers/auto_retriever.py +1 -0
  77. camel/runtimes/daytona_runtime.py +11 -12
  78. camel/societies/__init__.py +2 -0
  79. camel/societies/workforce/__init__.py +2 -0
  80. camel/societies/workforce/events.py +122 -0
  81. camel/societies/workforce/prompts.py +146 -66
  82. camel/societies/workforce/role_playing_worker.py +15 -11
  83. camel/societies/workforce/single_agent_worker.py +302 -65
  84. camel/societies/workforce/structured_output_handler.py +30 -18
  85. camel/societies/workforce/task_channel.py +163 -27
  86. camel/societies/workforce/utils.py +107 -13
  87. camel/societies/workforce/workflow_memory_manager.py +772 -0
  88. camel/societies/workforce/workforce.py +1949 -579
  89. camel/societies/workforce/workforce_callback.py +74 -0
  90. camel/societies/workforce/workforce_logger.py +168 -145
  91. camel/societies/workforce/workforce_metrics.py +33 -0
  92. camel/storages/key_value_storages/json.py +15 -2
  93. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  94. camel/storages/object_storages/google_cloud.py +1 -1
  95. camel/storages/vectordb_storages/oceanbase.py +13 -13
  96. camel/storages/vectordb_storages/qdrant.py +3 -3
  97. camel/storages/vectordb_storages/tidb.py +8 -6
  98. camel/tasks/task.py +4 -3
  99. camel/toolkits/__init__.py +20 -7
  100. camel/toolkits/aci_toolkit.py +45 -0
  101. camel/toolkits/base.py +6 -4
  102. camel/toolkits/code_execution.py +28 -1
  103. camel/toolkits/context_summarizer_toolkit.py +684 -0
  104. camel/toolkits/dappier_toolkit.py +5 -1
  105. camel/toolkits/dingtalk.py +1135 -0
  106. camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
  107. camel/toolkits/excel_toolkit.py +1 -1
  108. camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
  109. camel/toolkits/function_tool.py +13 -3
  110. camel/toolkits/github_toolkit.py +104 -17
  111. camel/toolkits/gmail_toolkit.py +1839 -0
  112. camel/toolkits/google_calendar_toolkit.py +38 -4
  113. camel/toolkits/google_drive_mcp_toolkit.py +12 -31
  114. camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
  115. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
  116. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
  117. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  118. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
  119. camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
  120. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
  121. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
  122. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
  123. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  124. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  125. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  126. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
  127. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
  128. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
  129. camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
  130. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
  131. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
  132. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
  133. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
  134. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
  135. camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
  136. camel/toolkits/klavis_toolkit.py +5 -1
  137. camel/toolkits/markitdown_toolkit.py +27 -1
  138. camel/toolkits/math_toolkit.py +64 -10
  139. camel/toolkits/mcp_toolkit.py +366 -71
  140. camel/toolkits/memory_toolkit.py +5 -1
  141. camel/toolkits/message_integration.py +18 -13
  142. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  143. camel/toolkits/note_taking_toolkit.py +19 -10
  144. camel/toolkits/notion_mcp_toolkit.py +16 -26
  145. camel/toolkits/openbb_toolkit.py +5 -1
  146. camel/toolkits/origene_mcp_toolkit.py +8 -49
  147. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  148. camel/toolkits/resend_toolkit.py +168 -0
  149. camel/toolkits/search_toolkit.py +264 -91
  150. camel/toolkits/slack_toolkit.py +64 -10
  151. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  152. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  153. camel/toolkits/terminal_toolkit/utils.py +532 -0
  154. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  155. camel/toolkits/video_analysis_toolkit.py +17 -11
  156. camel/toolkits/wechat_official_toolkit.py +483 -0
  157. camel/toolkits/zapier_toolkit.py +5 -1
  158. camel/types/__init__.py +2 -2
  159. camel/types/enums.py +274 -7
  160. camel/types/openai_types.py +2 -2
  161. camel/types/unified_model_type.py +15 -0
  162. camel/utils/commons.py +36 -5
  163. camel/utils/constants.py +3 -0
  164. camel/utils/context_utils.py +1003 -0
  165. camel/utils/mcp.py +138 -4
  166. camel/utils/token_counting.py +43 -20
  167. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
  168. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
  169. camel/loaders/pandas_reader.py +0 -368
  170. camel/toolkits/openai_agent_toolkit.py +0 -135
  171. camel/toolkits/terminal_toolkit.py +0 -1550
  172. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  173. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -73,11 +73,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
73
73
  "browser_select",
74
74
  "browser_scroll",
75
75
  "browser_enter",
76
+ "browser_mouse_control",
77
+ "browser_mouse_drag",
78
+ "browser_press_key",
76
79
  "browser_wait_user",
77
80
  "browser_solve_task",
78
81
  "browser_switch_tab",
79
82
  "browser_close_tab",
80
83
  "browser_get_tab_info",
84
+ "browser_console_view",
85
+ "browser_console_exec",
81
86
  ]
82
87
 
83
88
  def __init__(
@@ -87,11 +92,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
87
92
  user_data_dir: Optional[str] = None,
88
93
  stealth: bool = False,
89
94
  web_agent_model: Optional[BaseModelBackend] = None,
90
- cache_dir: str = "tmp/",
95
+ cache_dir: Optional[str] = None,
91
96
  enabled_tools: Optional[List[str]] = None,
92
97
  browser_log_to_file: bool = False,
98
+ log_dir: Optional[str] = None,
93
99
  session_id: Optional[str] = None,
94
- default_start_url: str = "https://google.com/",
100
+ default_start_url: Optional[str] = None,
95
101
  default_timeout: Optional[int] = None,
96
102
  short_timeout: Optional[int] = None,
97
103
  navigation_timeout: Optional[int] = None,
@@ -99,6 +105,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
99
105
  screenshot_timeout: Optional[int] = None,
100
106
  page_stability_timeout: Optional[int] = None,
101
107
  dom_content_loaded_timeout: Optional[int] = None,
108
+ viewport_limit: bool = False,
102
109
  ) -> None:
103
110
  r"""Initialize the HybridBrowserToolkit.
104
111
 
@@ -138,6 +145,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
138
145
  and page loading times.
139
146
  Logs are saved to an auto-generated timestamped file.
140
147
  Defaults to `False`.
148
+ log_dir (Optional[str]): Custom directory path for log files.
149
+ If None, defaults to "browser_log". Defaults to `None`.
141
150
  session_id (Optional[str]): A unique identifier for this browser
142
151
  session. When multiple HybridBrowserToolkit instances are
143
152
  used
@@ -182,6 +191,10 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
182
191
  HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to
183
192
  5000ms.
184
193
  Defaults to `None`.
194
+ viewport_limit (bool): When True, only return snapshot results
195
+ visible in the current viewport. When False, return all
196
+ elements on the page regardless of visibility.
197
+ Defaults to `False`.
185
198
  """
186
199
  super().__init__()
187
200
  RegisteredAgentToolkit.__init__(self)
@@ -189,10 +202,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
189
202
  self._user_data_dir = user_data_dir
190
203
  self._stealth = stealth
191
204
  self._web_agent_model = web_agent_model
192
- self._cache_dir = cache_dir
205
+ self._cache_dir = cache_dir or "tmp/"
193
206
  self._browser_log_to_file = browser_log_to_file
194
- self._default_start_url = default_start_url
207
+ self._log_dir = log_dir
208
+ self._default_start_url = default_start_url or "https://google.com/"
195
209
  self._session_id = session_id or "default"
210
+ self._viewport_limit = viewport_limit
196
211
 
197
212
  # Store timeout configuration
198
213
  self._default_timeout = default_timeout
@@ -226,7 +241,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
226
241
  # Set up log file if needed
227
242
  if self.log_to_file:
228
243
  # Create log directory if it doesn't exist
229
- log_dir = "browser_log"
244
+ log_dir = self._log_dir if self._log_dir else "browser_log"
230
245
  os.makedirs(log_dir, exist_ok=True)
231
246
 
232
247
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -309,7 +324,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
309
324
  # Try to close browser with a timeout to prevent hanging
310
325
  try:
311
326
  loop.run_until_complete(
312
- asyncio.wait_for(self.close_browser(), timeout=2.0)
327
+ asyncio.wait_for(self.browser_close(), timeout=2.0)
313
328
  )
314
329
  except asyncio.TimeoutError:
315
330
  pass # Skip cleanup if it takes too long
@@ -550,7 +565,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
550
565
  )
551
566
 
552
567
  async def _get_unified_analysis(
553
- self, max_retries: int = 3
568
+ self, max_retries: int = 3, viewport_limit: Optional[bool] = None
554
569
  ) -> Dict[str, Any]:
555
570
  r"""Get unified analysis data from the page with retry mechanism for
556
571
  navigation issues."""
@@ -573,7 +588,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
573
588
  # Don't fail if DOM wait times out
574
589
  pass
575
590
 
576
- result = await page.evaluate(self._unified_script)
591
+ # Use instance viewport_limit if parameter not provided
592
+ use_viewport_limit = (
593
+ viewport_limit
594
+ if viewport_limit is not None
595
+ else self._viewport_limit
596
+ )
597
+ result = await page.evaluate(
598
+ self._unified_script, use_viewport_limit
599
+ )
577
600
 
578
601
  if not isinstance(result, dict):
579
602
  logger.warning(f"Invalid result type: {type(result)}")
@@ -1703,6 +1726,149 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1703
1726
 
1704
1727
  return result
1705
1728
 
1729
+ @action_logger
1730
+ async def browser_mouse_control(
1731
+ self, *, control: str, x: float, y: float
1732
+ ) -> Dict[str, Any]:
1733
+ r"""Control the mouse to interact with browser with x, y coordinates
1734
+
1735
+ Args:
1736
+ control (str): The action to perform: 'click', 'right_click'
1737
+ or 'dblclick'.
1738
+ x (float): x-coordinate for the control action.
1739
+ y (float): y-coordinate for the control action.
1740
+
1741
+ Returns:
1742
+ Dict[str, Any]: A dictionary with the result of the action:
1743
+ - "result" (str): Confirmation of the action.
1744
+ - "snapshot" (str): A new page snapshot.
1745
+ - "tabs" (List[Dict]): Information about all open tabs.
1746
+ - "current_tab" (int): Index of the active tab.
1747
+ - "total_tabs" (int): Total number of open tabs.
1748
+ """
1749
+ if control not in ("click", "right_click", "dblclick"):
1750
+ tab_info = await self._get_tab_info_for_output()
1751
+ return {
1752
+ "result": "Error: supported control actions are "
1753
+ "'click' or 'dblclick'",
1754
+ "snapshot": "",
1755
+ **tab_info,
1756
+ }
1757
+
1758
+ action = {"type": "mouse_control", "control": control, "x": x, "y": y}
1759
+
1760
+ result = await self._exec_with_snapshot(action)
1761
+
1762
+ # Add tab information to the result
1763
+ tab_info = await self._get_tab_info_for_output()
1764
+ result.update(tab_info)
1765
+
1766
+ return result
1767
+
1768
+ @action_logger
1769
+ async def browser_mouse_drag(
1770
+ self, *, from_ref: str, to_ref: str
1771
+ ) -> Dict[str, Any]:
1772
+ r"""Control the mouse to drag and drop in the browser using ref IDs.
1773
+
1774
+ Args:
1775
+ from_ref (str): The `ref` ID of the source element to drag from.
1776
+ to_ref (str): The `ref` ID of the target element to drag to.
1777
+
1778
+ Returns:
1779
+ Dict[str, Any]: A dictionary with the result of the action:
1780
+ - "result" (str): Confirmation of the action.
1781
+ - "snapshot" (str): A new page snapshot.
1782
+ - "tabs" (List[Dict]): Information about all open tabs.
1783
+ - "current_tab" (int): Index of the active tab.
1784
+ - "total_tabs" (int): Total number of open tabs.
1785
+ """
1786
+ # Validate refs
1787
+ self._validate_ref(from_ref, "drag source")
1788
+ self._validate_ref(to_ref, "drag target")
1789
+
1790
+ # Get element analysis to find coordinates
1791
+ analysis = await self._get_unified_analysis()
1792
+ elements = analysis.get("elements", {})
1793
+
1794
+ if from_ref not in elements:
1795
+ logger.error(
1796
+ f"Error: Source element reference '{from_ref}' not found."
1797
+ )
1798
+ snapshot = self._format_snapshot_from_analysis(analysis)
1799
+ tab_info = await self._get_tab_info_for_output()
1800
+ return {
1801
+ "result": (
1802
+ f"Error: Source element reference '{from_ref}' not found."
1803
+ ),
1804
+ "snapshot": snapshot,
1805
+ **tab_info,
1806
+ }
1807
+
1808
+ if to_ref not in elements:
1809
+ logger.error(
1810
+ f"Error: Target element reference '{to_ref}' not found."
1811
+ )
1812
+ snapshot = self._format_snapshot_from_analysis(analysis)
1813
+ tab_info = await self._get_tab_info_for_output()
1814
+ return {
1815
+ "result": (
1816
+ f"Error: Target element reference '{to_ref}' not found."
1817
+ ),
1818
+ "snapshot": snapshot,
1819
+ **tab_info,
1820
+ }
1821
+
1822
+ action = {
1823
+ "type": "mouse_drag",
1824
+ "from_ref": from_ref,
1825
+ "to_ref": to_ref,
1826
+ }
1827
+
1828
+ result = await self._exec_with_snapshot(action)
1829
+
1830
+ # Add tab information to the result
1831
+ tab_info = await self._get_tab_info_for_output()
1832
+ result.update(tab_info)
1833
+
1834
+ return result
1835
+
1836
+ @action_logger
1837
+ async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
1838
+ r"""Press key and key combinations.
1839
+ Supports single key press or combination of keys by concatenating
1840
+ them with '+' separator.
1841
+
1842
+ Args:
1843
+ keys (List[str]): key or list of keys.
1844
+
1845
+ Returns:
1846
+ Dict[str, Any]: A dictionary with the result of the action:
1847
+ - "result" (str): Confirmation of the action.
1848
+ - "snapshot" (str): A new page snapshot.
1849
+ - "tabs" (List[Dict]): Information about all open tabs.
1850
+ - "current_tab" (int): Index of the active tab.
1851
+ - "total_tabs" (int): Total number of open tabs.
1852
+ """
1853
+ if not isinstance(keys, list) or not all(
1854
+ isinstance(item, str) for item in keys
1855
+ ):
1856
+ tab_info = await self._get_tab_info_for_output()
1857
+ return {
1858
+ "result": "Error: Expected keys as a list of strings.",
1859
+ "snapshot": "",
1860
+ **tab_info,
1861
+ }
1862
+ action = {"type": "press_key", "keys": keys}
1863
+
1864
+ result = await self._exec_with_snapshot(action)
1865
+
1866
+ # Add tab information to the result
1867
+ tab_info = await self._get_tab_info_for_output()
1868
+ result.update(tab_info)
1869
+
1870
+ return result
1871
+
1706
1872
  @action_logger
1707
1873
  async def browser_wait_user(
1708
1874
  self, timeout_sec: Optional[float] = None
@@ -1830,6 +1996,148 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1830
1996
  await agent.process_command(task_prompt, max_steps=max_steps)
1831
1997
  return "Task processing finished - see stdout for detailed trace."
1832
1998
 
1999
+ @action_logger
2000
+ async def browser_console_view(self) -> Dict[str, Any]:
2001
+ r"""View current page console logs.
2002
+
2003
+ Returns:
2004
+ Dict[str, Any]: A dictionary with the result of the action:
2005
+ - console_messages (List[Dict]) : collection of logs from the
2006
+ browser console
2007
+ """
2008
+ try:
2009
+ logs = await self._session.get_console_logs()
2010
+ # make output JSON serializable
2011
+ return {"console_messages": list(logs)}
2012
+ except Exception as e:
2013
+ logger.warning(f"Failed to retrieve logs: {e}")
2014
+ return {"console_messages": []}
2015
+
2016
+ async def browser_console_exec(self, code: str) -> Dict[str, Any]:
2017
+ r"""Execute javascript code in the console of the current page and get
2018
+ results.
2019
+
2020
+ Args:
2021
+ code (str): JavaScript code for execution.
2022
+
2023
+ Returns:
2024
+ Dict[str, Any]: A dictionary with the result of the action:
2025
+ - "result" (str): Result of the action.
2026
+ - "console_output" (List[str]): Console log outputs during
2027
+ execution.
2028
+ - "snapshot" (str): A new page snapshot.
2029
+ - "tabs" (List[Dict]): Information about all open tabs.
2030
+ - "current_tab" (int): Index of the active tab.
2031
+ - "total_tabs" (int): Total number of open tabs.
2032
+ """
2033
+ page = await self._require_page()
2034
+
2035
+ try:
2036
+ logger.info("Executing JavaScript code in browser console.")
2037
+ exec_start = time.time()
2038
+
2039
+ # Wrap the code to capture console.log output and handle
2040
+ # expressions
2041
+ wrapped_code = (
2042
+ """
2043
+ (function() {
2044
+ const _logs = [];
2045
+ const originalLog = console.log;
2046
+ console.log = function(...args) {
2047
+ _logs.push(args.map(arg => {
2048
+ try {
2049
+ return typeof arg === 'object' ?
2050
+ JSON.stringify(arg) : String(arg);
2051
+ } catch (e) {
2052
+ return String(arg);
2053
+ }
2054
+ }).join(' '));
2055
+ originalLog.apply(console, args);
2056
+ };
2057
+
2058
+ let result;
2059
+ try {
2060
+ // First try to evaluate as an expression
2061
+ // (like browser console)
2062
+ result = eval("""
2063
+ + repr(code)
2064
+ + """);
2065
+ } catch (e) {
2066
+ // If that fails, execute as statements
2067
+ try {
2068
+ result = (function() { """
2069
+ + code
2070
+ + """ })();
2071
+ } catch (error) {
2072
+ console.log = originalLog;
2073
+ throw error;
2074
+ }
2075
+ }
2076
+
2077
+ console.log = originalLog;
2078
+ return { result, logs: _logs };
2079
+ })()
2080
+ """
2081
+ )
2082
+
2083
+ eval_result = await page.evaluate(wrapped_code)
2084
+ result = eval_result.get('result')
2085
+ console_logs = eval_result.get('logs', [])
2086
+
2087
+ exec_time = time.time() - exec_start
2088
+ logger.info(f"Code execution completed in {exec_time:.2f}s.")
2089
+
2090
+ import asyncio
2091
+ import json
2092
+
2093
+ await asyncio.sleep(0.2)
2094
+
2095
+ # Get snapshot
2096
+ logger.info("Capturing page snapshot after code execution.")
2097
+ snapshot_start = time.time()
2098
+ snapshot = await self._session.get_snapshot(
2099
+ force_refresh=True, diff_only=False
2100
+ )
2101
+ snapshot_time = time.time() - snapshot_start
2102
+ logger.info(
2103
+ f"Code execution snapshot captured in " f"{snapshot_time:.2f}s"
2104
+ )
2105
+
2106
+ # Get tab information
2107
+ tab_info = await self._get_tab_info_for_output()
2108
+
2109
+ # Properly serialize the result
2110
+ try:
2111
+ result_str = json.dumps(result, indent=2)
2112
+ except (TypeError, ValueError):
2113
+ result_str = str(result)
2114
+
2115
+ return {
2116
+ "result": f"Code execution result: {result_str}",
2117
+ "console_output": console_logs,
2118
+ "snapshot": snapshot,
2119
+ **tab_info,
2120
+ }
2121
+
2122
+ except Exception as e:
2123
+ logger.warning(f"Code execution failed: {e}")
2124
+ # Get tab information for error case
2125
+ try:
2126
+ tab_info = await self._get_tab_info_for_output()
2127
+ except Exception:
2128
+ tab_info = {
2129
+ "tabs": [],
2130
+ "current_tab": 0,
2131
+ "total_tabs": 0,
2132
+ }
2133
+
2134
+ return {
2135
+ "result": f"Code execution failed: {e}",
2136
+ "console_output": [],
2137
+ "snapshot": "",
2138
+ **tab_info,
2139
+ }
2140
+
1833
2141
  def get_log_summary(self) -> Dict[str, Any]:
1834
2142
  r"""Get a summary of logged actions."""
1835
2143
  if not self.log_buffer:
@@ -2045,11 +2353,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
2045
2353
  "browser_select": self.browser_select,
2046
2354
  "browser_scroll": self.browser_scroll,
2047
2355
  "browser_enter": self.browser_enter,
2356
+ "browser_mouse_control": self.browser_mouse_control,
2357
+ "browser_mouse_drag": self.browser_mouse_drag,
2358
+ "browser_press_key": self.browser_press_key,
2048
2359
  "browser_wait_user": self.browser_wait_user,
2049
2360
  "browser_solve_task": self.browser_solve_task,
2050
2361
  "browser_switch_tab": self.browser_switch_tab,
2051
2362
  "browser_close_tab": self.browser_close_tab,
2052
2363
  "browser_get_tab_info": self.browser_get_tab_info,
2364
+ "browser_console_view": self.browser_console_view,
2365
+ "browser_console_exec": self.browser_console_exec,
2053
2366
  }
2054
2367
 
2055
2368
  enabled_tools = []
@@ -43,7 +43,11 @@ class PageSnapshot:
43
43
  # Public API
44
44
  # ---------------------------------------------------------------------
45
45
  async def capture(
46
- self, *, force_refresh: bool = False, diff_only: bool = False
46
+ self,
47
+ *,
48
+ force_refresh: bool = False,
49
+ diff_only: bool = False,
50
+ viewport_limit: bool = False,
47
51
  ) -> str:
48
52
  """Return current snapshot or just the diff to previous one."""
49
53
  try:
@@ -65,7 +69,9 @@ class PageSnapshot:
65
69
  )
66
70
 
67
71
  logger.debug("Capturing page snapshot …")
68
- snapshot_result = await self._get_snapshot_direct()
72
+ snapshot_result = await self._get_snapshot_direct(
73
+ viewport_limit=viewport_limit
74
+ )
69
75
 
70
76
  # Extract snapshot text from the unified analyzer result
71
77
  if (
@@ -111,7 +117,7 @@ class PageSnapshot:
111
117
  _snapshot_js_cache: Optional[str] = None # class-level cache
112
118
 
113
119
  async def _get_snapshot_direct(
114
- self,
120
+ self, viewport_limit: bool = False
115
121
  ) -> Optional[Union[str, Dict[str, Any]]]:
116
122
  r"""Evaluate the snapshot-extraction JS with simple retry logic.
117
123
 
@@ -133,7 +139,7 @@ class PageSnapshot:
133
139
  retries: int = 3
134
140
  while retries > 0:
135
141
  try:
136
- return await self.page.evaluate(js_code)
142
+ return await self.page.evaluate(js_code, viewport_limit)
137
143
  except Exception as e:
138
144
  msg = str(e)
139
145
 
@@ -1,4 +1,4 @@
1
- (() => {
1
+ ((viewport_limit = false) => {
2
2
  // Unified analyzer that combines visual and structural analysis
3
3
  // Preserves complete snapshot.js logic while adding visual coordinate information
4
4
 
@@ -406,6 +406,11 @@
406
406
  if (tagName === 'header') return 'banner';
407
407
  if (tagName === 'footer') return 'contentinfo';
408
408
  if (tagName === 'fieldset') return 'group';
409
+
410
+ // Enhanced role mappings for table elements
411
+ if (tagName === 'table') return 'table';
412
+ if (tagName === 'tr') return 'row';
413
+ if (tagName === 'td' || tagName === 'th') return 'cell';
409
414
 
410
415
  return 'generic';
411
416
  }
@@ -484,6 +489,9 @@
484
489
 
485
490
  // Add a heuristic to ignore code-like text that might be in the DOM
486
491
  if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
492
+
493
+
494
+
487
495
  return text;
488
496
  }
489
497
 
@@ -578,6 +586,8 @@
578
586
  const level = getAriaLevel(element);
579
587
  if (level > 0) node.level = level;
580
588
 
589
+
590
+
581
591
  return node;
582
592
  }
583
593
 
@@ -725,6 +735,9 @@
725
735
  if (isRedundantWrapper) {
726
736
  return node.children;
727
737
  }
738
+
739
+
740
+
728
741
  return [node];
729
742
  }
730
743
 
@@ -815,6 +828,23 @@
815
828
 
816
829
  // === Visual analysis functions from page_script.js ===
817
830
 
831
+ // Check if element is within the current viewport
832
+ function isInViewport(element) {
833
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
834
+
835
+ try {
836
+ const rect = element.getBoundingClientRect();
837
+ return (
838
+ rect.top >= 0 &&
839
+ rect.left >= 0 &&
840
+ rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
841
+ rect.right <= (window.innerWidth || document.documentElement.clientWidth)
842
+ );
843
+ } catch (e) {
844
+ return false;
845
+ }
846
+ }
847
+
818
848
  // From page_script.js - check if element is topmost at coordinates
819
849
  function isTopmost(element, x, y) {
820
850
  let hit = document.elementFromPoint(x, y);
@@ -855,10 +885,21 @@
855
885
 
856
886
  // === Unified analysis function ===
857
887
 
858
- function collectElementsFromTree(node, elementsMap) {
888
+ function collectElementsFromTree(node, elementsMap, viewportLimitEnabled = false) {
859
889
  if (typeof node === 'string') return;
860
890
 
861
891
  if (node.element && node.ref) {
892
+ // If viewport_limit is enabled, only include elements that are in the viewport
893
+ if (viewportLimitEnabled && !isInViewport(node.element)) {
894
+ // Skip this element but still process its children
895
+ if (node.children) {
896
+ for (const child of node.children) {
897
+ collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
898
+ }
899
+ }
900
+ return;
901
+ }
902
+
862
903
  // Get visual coordinates for this element
863
904
  const coordinates = getElementCoordinates(node.element);
864
905
 
@@ -891,7 +932,7 @@
891
932
  // Recursively process children
892
933
  if (node.children) {
893
934
  for (const child of node.children) {
894
- collectElementsFromTree(child, elementsMap);
935
+ collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
895
936
  }
896
937
  }
897
938
  }
@@ -931,7 +972,7 @@
931
972
  [tree] = normalizeTree(tree);
932
973
 
933
974
  const elementsMap = {};
934
- collectElementsFromTree(tree, elementsMap);
975
+ collectElementsFromTree(tree, elementsMap, viewport_limit);
935
976
 
936
977
  // Verify uniqueness of aria-ref attributes (debugging aid)
937
978
  const ariaRefCounts = {};