camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +2217 -519
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/agent_memories.py +48 -4
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/context_creators/score_based.py +25 -384
- camel/memories/records.py +88 -8
- camel/messages/base.py +153 -34
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +6 -19
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +114 -89
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +32 -14
- camel/models/cohere_model.py +1 -16
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +36 -18
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +105 -24
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +62 -41
- camel/models/openai_model.py +62 -57
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/__init__.py +2 -0
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +146 -66
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +302 -65
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +163 -27
- camel/societies/workforce/utils.py +107 -13
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +1949 -579
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +168 -145
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/base.py +6 -4
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dappier_toolkit.py +5 -1
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/excel_toolkit.py +1 -1
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
- camel/toolkits/function_tool.py +13 -3
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +366 -71
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_integration.py +18 -13
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +19 -10
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/search_toolkit.py +264 -91
- camel/toolkits/slack_toolkit.py +64 -10
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/enums.py +274 -7
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +15 -0
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/token_counting.py +43 -20
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1550
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -73,11 +73,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
73
73
|
"browser_select",
|
|
74
74
|
"browser_scroll",
|
|
75
75
|
"browser_enter",
|
|
76
|
+
"browser_mouse_control",
|
|
77
|
+
"browser_mouse_drag",
|
|
78
|
+
"browser_press_key",
|
|
76
79
|
"browser_wait_user",
|
|
77
80
|
"browser_solve_task",
|
|
78
81
|
"browser_switch_tab",
|
|
79
82
|
"browser_close_tab",
|
|
80
83
|
"browser_get_tab_info",
|
|
84
|
+
"browser_console_view",
|
|
85
|
+
"browser_console_exec",
|
|
81
86
|
]
|
|
82
87
|
|
|
83
88
|
def __init__(
|
|
@@ -87,11 +92,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
87
92
|
user_data_dir: Optional[str] = None,
|
|
88
93
|
stealth: bool = False,
|
|
89
94
|
web_agent_model: Optional[BaseModelBackend] = None,
|
|
90
|
-
cache_dir: str =
|
|
95
|
+
cache_dir: Optional[str] = None,
|
|
91
96
|
enabled_tools: Optional[List[str]] = None,
|
|
92
97
|
browser_log_to_file: bool = False,
|
|
98
|
+
log_dir: Optional[str] = None,
|
|
93
99
|
session_id: Optional[str] = None,
|
|
94
|
-
default_start_url: str =
|
|
100
|
+
default_start_url: Optional[str] = None,
|
|
95
101
|
default_timeout: Optional[int] = None,
|
|
96
102
|
short_timeout: Optional[int] = None,
|
|
97
103
|
navigation_timeout: Optional[int] = None,
|
|
@@ -99,6 +105,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
99
105
|
screenshot_timeout: Optional[int] = None,
|
|
100
106
|
page_stability_timeout: Optional[int] = None,
|
|
101
107
|
dom_content_loaded_timeout: Optional[int] = None,
|
|
108
|
+
viewport_limit: bool = False,
|
|
102
109
|
) -> None:
|
|
103
110
|
r"""Initialize the HybridBrowserToolkit.
|
|
104
111
|
|
|
@@ -138,6 +145,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
138
145
|
and page loading times.
|
|
139
146
|
Logs are saved to an auto-generated timestamped file.
|
|
140
147
|
Defaults to `False`.
|
|
148
|
+
log_dir (Optional[str]): Custom directory path for log files.
|
|
149
|
+
If None, defaults to "browser_log". Defaults to `None`.
|
|
141
150
|
session_id (Optional[str]): A unique identifier for this browser
|
|
142
151
|
session. When multiple HybridBrowserToolkit instances are
|
|
143
152
|
used
|
|
@@ -182,6 +191,10 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
182
191
|
HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to
|
|
183
192
|
5000ms.
|
|
184
193
|
Defaults to `None`.
|
|
194
|
+
viewport_limit (bool): When True, only return snapshot results
|
|
195
|
+
visible in the current viewport. When False, return all
|
|
196
|
+
elements on the page regardless of visibility.
|
|
197
|
+
Defaults to `False`.
|
|
185
198
|
"""
|
|
186
199
|
super().__init__()
|
|
187
200
|
RegisteredAgentToolkit.__init__(self)
|
|
@@ -189,10 +202,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
189
202
|
self._user_data_dir = user_data_dir
|
|
190
203
|
self._stealth = stealth
|
|
191
204
|
self._web_agent_model = web_agent_model
|
|
192
|
-
self._cache_dir = cache_dir
|
|
205
|
+
self._cache_dir = cache_dir or "tmp/"
|
|
193
206
|
self._browser_log_to_file = browser_log_to_file
|
|
194
|
-
self.
|
|
207
|
+
self._log_dir = log_dir
|
|
208
|
+
self._default_start_url = default_start_url or "https://google.com/"
|
|
195
209
|
self._session_id = session_id or "default"
|
|
210
|
+
self._viewport_limit = viewport_limit
|
|
196
211
|
|
|
197
212
|
# Store timeout configuration
|
|
198
213
|
self._default_timeout = default_timeout
|
|
@@ -226,7 +241,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
226
241
|
# Set up log file if needed
|
|
227
242
|
if self.log_to_file:
|
|
228
243
|
# Create log directory if it doesn't exist
|
|
229
|
-
log_dir = "browser_log"
|
|
244
|
+
log_dir = self._log_dir if self._log_dir else "browser_log"
|
|
230
245
|
os.makedirs(log_dir, exist_ok=True)
|
|
231
246
|
|
|
232
247
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
@@ -309,7 +324,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
309
324
|
# Try to close browser with a timeout to prevent hanging
|
|
310
325
|
try:
|
|
311
326
|
loop.run_until_complete(
|
|
312
|
-
asyncio.wait_for(self.
|
|
327
|
+
asyncio.wait_for(self.browser_close(), timeout=2.0)
|
|
313
328
|
)
|
|
314
329
|
except asyncio.TimeoutError:
|
|
315
330
|
pass # Skip cleanup if it takes too long
|
|
@@ -550,7 +565,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
550
565
|
)
|
|
551
566
|
|
|
552
567
|
async def _get_unified_analysis(
|
|
553
|
-
self, max_retries: int = 3
|
|
568
|
+
self, max_retries: int = 3, viewport_limit: Optional[bool] = None
|
|
554
569
|
) -> Dict[str, Any]:
|
|
555
570
|
r"""Get unified analysis data from the page with retry mechanism for
|
|
556
571
|
navigation issues."""
|
|
@@ -573,7 +588,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
573
588
|
# Don't fail if DOM wait times out
|
|
574
589
|
pass
|
|
575
590
|
|
|
576
|
-
|
|
591
|
+
# Use instance viewport_limit if parameter not provided
|
|
592
|
+
use_viewport_limit = (
|
|
593
|
+
viewport_limit
|
|
594
|
+
if viewport_limit is not None
|
|
595
|
+
else self._viewport_limit
|
|
596
|
+
)
|
|
597
|
+
result = await page.evaluate(
|
|
598
|
+
self._unified_script, use_viewport_limit
|
|
599
|
+
)
|
|
577
600
|
|
|
578
601
|
if not isinstance(result, dict):
|
|
579
602
|
logger.warning(f"Invalid result type: {type(result)}")
|
|
@@ -1703,6 +1726,149 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1703
1726
|
|
|
1704
1727
|
return result
|
|
1705
1728
|
|
|
1729
|
+
@action_logger
|
|
1730
|
+
async def browser_mouse_control(
|
|
1731
|
+
self, *, control: str, x: float, y: float
|
|
1732
|
+
) -> Dict[str, Any]:
|
|
1733
|
+
r"""Control the mouse to interact with browser with x, y coordinates
|
|
1734
|
+
|
|
1735
|
+
Args:
|
|
1736
|
+
control (str): The action to perform: 'click', 'right_click'
|
|
1737
|
+
or 'dblclick'.
|
|
1738
|
+
x (float): x-coordinate for the control action.
|
|
1739
|
+
y (float): y-coordinate for the control action.
|
|
1740
|
+
|
|
1741
|
+
Returns:
|
|
1742
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1743
|
+
- "result" (str): Confirmation of the action.
|
|
1744
|
+
- "snapshot" (str): A new page snapshot.
|
|
1745
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1746
|
+
- "current_tab" (int): Index of the active tab.
|
|
1747
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1748
|
+
"""
|
|
1749
|
+
if control not in ("click", "right_click", "dblclick"):
|
|
1750
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1751
|
+
return {
|
|
1752
|
+
"result": "Error: supported control actions are "
|
|
1753
|
+
"'click' or 'dblclick'",
|
|
1754
|
+
"snapshot": "",
|
|
1755
|
+
**tab_info,
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
action = {"type": "mouse_control", "control": control, "x": x, "y": y}
|
|
1759
|
+
|
|
1760
|
+
result = await self._exec_with_snapshot(action)
|
|
1761
|
+
|
|
1762
|
+
# Add tab information to the result
|
|
1763
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1764
|
+
result.update(tab_info)
|
|
1765
|
+
|
|
1766
|
+
return result
|
|
1767
|
+
|
|
1768
|
+
@action_logger
|
|
1769
|
+
async def browser_mouse_drag(
|
|
1770
|
+
self, *, from_ref: str, to_ref: str
|
|
1771
|
+
) -> Dict[str, Any]:
|
|
1772
|
+
r"""Control the mouse to drag and drop in the browser using ref IDs.
|
|
1773
|
+
|
|
1774
|
+
Args:
|
|
1775
|
+
from_ref (str): The `ref` ID of the source element to drag from.
|
|
1776
|
+
to_ref (str): The `ref` ID of the target element to drag to.
|
|
1777
|
+
|
|
1778
|
+
Returns:
|
|
1779
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1780
|
+
- "result" (str): Confirmation of the action.
|
|
1781
|
+
- "snapshot" (str): A new page snapshot.
|
|
1782
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1783
|
+
- "current_tab" (int): Index of the active tab.
|
|
1784
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1785
|
+
"""
|
|
1786
|
+
# Validate refs
|
|
1787
|
+
self._validate_ref(from_ref, "drag source")
|
|
1788
|
+
self._validate_ref(to_ref, "drag target")
|
|
1789
|
+
|
|
1790
|
+
# Get element analysis to find coordinates
|
|
1791
|
+
analysis = await self._get_unified_analysis()
|
|
1792
|
+
elements = analysis.get("elements", {})
|
|
1793
|
+
|
|
1794
|
+
if from_ref not in elements:
|
|
1795
|
+
logger.error(
|
|
1796
|
+
f"Error: Source element reference '{from_ref}' not found."
|
|
1797
|
+
)
|
|
1798
|
+
snapshot = self._format_snapshot_from_analysis(analysis)
|
|
1799
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1800
|
+
return {
|
|
1801
|
+
"result": (
|
|
1802
|
+
f"Error: Source element reference '{from_ref}' not found."
|
|
1803
|
+
),
|
|
1804
|
+
"snapshot": snapshot,
|
|
1805
|
+
**tab_info,
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
if to_ref not in elements:
|
|
1809
|
+
logger.error(
|
|
1810
|
+
f"Error: Target element reference '{to_ref}' not found."
|
|
1811
|
+
)
|
|
1812
|
+
snapshot = self._format_snapshot_from_analysis(analysis)
|
|
1813
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1814
|
+
return {
|
|
1815
|
+
"result": (
|
|
1816
|
+
f"Error: Target element reference '{to_ref}' not found."
|
|
1817
|
+
),
|
|
1818
|
+
"snapshot": snapshot,
|
|
1819
|
+
**tab_info,
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
action = {
|
|
1823
|
+
"type": "mouse_drag",
|
|
1824
|
+
"from_ref": from_ref,
|
|
1825
|
+
"to_ref": to_ref,
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
result = await self._exec_with_snapshot(action)
|
|
1829
|
+
|
|
1830
|
+
# Add tab information to the result
|
|
1831
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1832
|
+
result.update(tab_info)
|
|
1833
|
+
|
|
1834
|
+
return result
|
|
1835
|
+
|
|
1836
|
+
@action_logger
|
|
1837
|
+
async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
|
|
1838
|
+
r"""Press key and key combinations.
|
|
1839
|
+
Supports single key press or combination of keys by concatenating
|
|
1840
|
+
them with '+' separator.
|
|
1841
|
+
|
|
1842
|
+
Args:
|
|
1843
|
+
keys (List[str]): key or list of keys.
|
|
1844
|
+
|
|
1845
|
+
Returns:
|
|
1846
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1847
|
+
- "result" (str): Confirmation of the action.
|
|
1848
|
+
- "snapshot" (str): A new page snapshot.
|
|
1849
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1850
|
+
- "current_tab" (int): Index of the active tab.
|
|
1851
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1852
|
+
"""
|
|
1853
|
+
if not isinstance(keys, list) or not all(
|
|
1854
|
+
isinstance(item, str) for item in keys
|
|
1855
|
+
):
|
|
1856
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1857
|
+
return {
|
|
1858
|
+
"result": "Error: Expected keys as a list of strings.",
|
|
1859
|
+
"snapshot": "",
|
|
1860
|
+
**tab_info,
|
|
1861
|
+
}
|
|
1862
|
+
action = {"type": "press_key", "keys": keys}
|
|
1863
|
+
|
|
1864
|
+
result = await self._exec_with_snapshot(action)
|
|
1865
|
+
|
|
1866
|
+
# Add tab information to the result
|
|
1867
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1868
|
+
result.update(tab_info)
|
|
1869
|
+
|
|
1870
|
+
return result
|
|
1871
|
+
|
|
1706
1872
|
@action_logger
|
|
1707
1873
|
async def browser_wait_user(
|
|
1708
1874
|
self, timeout_sec: Optional[float] = None
|
|
@@ -1830,6 +1996,148 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1830
1996
|
await agent.process_command(task_prompt, max_steps=max_steps)
|
|
1831
1997
|
return "Task processing finished - see stdout for detailed trace."
|
|
1832
1998
|
|
|
1999
|
+
@action_logger
|
|
2000
|
+
async def browser_console_view(self) -> Dict[str, Any]:
|
|
2001
|
+
r"""View current page console logs.
|
|
2002
|
+
|
|
2003
|
+
Returns:
|
|
2004
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
2005
|
+
- console_messages (List[Dict]) : collection of logs from the
|
|
2006
|
+
browser console
|
|
2007
|
+
"""
|
|
2008
|
+
try:
|
|
2009
|
+
logs = await self._session.get_console_logs()
|
|
2010
|
+
# make output JSON serializable
|
|
2011
|
+
return {"console_messages": list(logs)}
|
|
2012
|
+
except Exception as e:
|
|
2013
|
+
logger.warning(f"Failed to retrieve logs: {e}")
|
|
2014
|
+
return {"console_messages": []}
|
|
2015
|
+
|
|
2016
|
+
async def browser_console_exec(self, code: str) -> Dict[str, Any]:
|
|
2017
|
+
r"""Execute javascript code in the console of the current page and get
|
|
2018
|
+
results.
|
|
2019
|
+
|
|
2020
|
+
Args:
|
|
2021
|
+
code (str): JavaScript code for execution.
|
|
2022
|
+
|
|
2023
|
+
Returns:
|
|
2024
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
2025
|
+
- "result" (str): Result of the action.
|
|
2026
|
+
- "console_output" (List[str]): Console log outputs during
|
|
2027
|
+
execution.
|
|
2028
|
+
- "snapshot" (str): A new page snapshot.
|
|
2029
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
2030
|
+
- "current_tab" (int): Index of the active tab.
|
|
2031
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
2032
|
+
"""
|
|
2033
|
+
page = await self._require_page()
|
|
2034
|
+
|
|
2035
|
+
try:
|
|
2036
|
+
logger.info("Executing JavaScript code in browser console.")
|
|
2037
|
+
exec_start = time.time()
|
|
2038
|
+
|
|
2039
|
+
# Wrap the code to capture console.log output and handle
|
|
2040
|
+
# expressions
|
|
2041
|
+
wrapped_code = (
|
|
2042
|
+
"""
|
|
2043
|
+
(function() {
|
|
2044
|
+
const _logs = [];
|
|
2045
|
+
const originalLog = console.log;
|
|
2046
|
+
console.log = function(...args) {
|
|
2047
|
+
_logs.push(args.map(arg => {
|
|
2048
|
+
try {
|
|
2049
|
+
return typeof arg === 'object' ?
|
|
2050
|
+
JSON.stringify(arg) : String(arg);
|
|
2051
|
+
} catch (e) {
|
|
2052
|
+
return String(arg);
|
|
2053
|
+
}
|
|
2054
|
+
}).join(' '));
|
|
2055
|
+
originalLog.apply(console, args);
|
|
2056
|
+
};
|
|
2057
|
+
|
|
2058
|
+
let result;
|
|
2059
|
+
try {
|
|
2060
|
+
// First try to evaluate as an expression
|
|
2061
|
+
// (like browser console)
|
|
2062
|
+
result = eval("""
|
|
2063
|
+
+ repr(code)
|
|
2064
|
+
+ """);
|
|
2065
|
+
} catch (e) {
|
|
2066
|
+
// If that fails, execute as statements
|
|
2067
|
+
try {
|
|
2068
|
+
result = (function() { """
|
|
2069
|
+
+ code
|
|
2070
|
+
+ """ })();
|
|
2071
|
+
} catch (error) {
|
|
2072
|
+
console.log = originalLog;
|
|
2073
|
+
throw error;
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
console.log = originalLog;
|
|
2078
|
+
return { result, logs: _logs };
|
|
2079
|
+
})()
|
|
2080
|
+
"""
|
|
2081
|
+
)
|
|
2082
|
+
|
|
2083
|
+
eval_result = await page.evaluate(wrapped_code)
|
|
2084
|
+
result = eval_result.get('result')
|
|
2085
|
+
console_logs = eval_result.get('logs', [])
|
|
2086
|
+
|
|
2087
|
+
exec_time = time.time() - exec_start
|
|
2088
|
+
logger.info(f"Code execution completed in {exec_time:.2f}s.")
|
|
2089
|
+
|
|
2090
|
+
import asyncio
|
|
2091
|
+
import json
|
|
2092
|
+
|
|
2093
|
+
await asyncio.sleep(0.2)
|
|
2094
|
+
|
|
2095
|
+
# Get snapshot
|
|
2096
|
+
logger.info("Capturing page snapshot after code execution.")
|
|
2097
|
+
snapshot_start = time.time()
|
|
2098
|
+
snapshot = await self._session.get_snapshot(
|
|
2099
|
+
force_refresh=True, diff_only=False
|
|
2100
|
+
)
|
|
2101
|
+
snapshot_time = time.time() - snapshot_start
|
|
2102
|
+
logger.info(
|
|
2103
|
+
f"Code execution snapshot captured in " f"{snapshot_time:.2f}s"
|
|
2104
|
+
)
|
|
2105
|
+
|
|
2106
|
+
# Get tab information
|
|
2107
|
+
tab_info = await self._get_tab_info_for_output()
|
|
2108
|
+
|
|
2109
|
+
# Properly serialize the result
|
|
2110
|
+
try:
|
|
2111
|
+
result_str = json.dumps(result, indent=2)
|
|
2112
|
+
except (TypeError, ValueError):
|
|
2113
|
+
result_str = str(result)
|
|
2114
|
+
|
|
2115
|
+
return {
|
|
2116
|
+
"result": f"Code execution result: {result_str}",
|
|
2117
|
+
"console_output": console_logs,
|
|
2118
|
+
"snapshot": snapshot,
|
|
2119
|
+
**tab_info,
|
|
2120
|
+
}
|
|
2121
|
+
|
|
2122
|
+
except Exception as e:
|
|
2123
|
+
logger.warning(f"Code execution failed: {e}")
|
|
2124
|
+
# Get tab information for error case
|
|
2125
|
+
try:
|
|
2126
|
+
tab_info = await self._get_tab_info_for_output()
|
|
2127
|
+
except Exception:
|
|
2128
|
+
tab_info = {
|
|
2129
|
+
"tabs": [],
|
|
2130
|
+
"current_tab": 0,
|
|
2131
|
+
"total_tabs": 0,
|
|
2132
|
+
}
|
|
2133
|
+
|
|
2134
|
+
return {
|
|
2135
|
+
"result": f"Code execution failed: {e}",
|
|
2136
|
+
"console_output": [],
|
|
2137
|
+
"snapshot": "",
|
|
2138
|
+
**tab_info,
|
|
2139
|
+
}
|
|
2140
|
+
|
|
1833
2141
|
def get_log_summary(self) -> Dict[str, Any]:
|
|
1834
2142
|
r"""Get a summary of logged actions."""
|
|
1835
2143
|
if not self.log_buffer:
|
|
@@ -2045,11 +2353,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
2045
2353
|
"browser_select": self.browser_select,
|
|
2046
2354
|
"browser_scroll": self.browser_scroll,
|
|
2047
2355
|
"browser_enter": self.browser_enter,
|
|
2356
|
+
"browser_mouse_control": self.browser_mouse_control,
|
|
2357
|
+
"browser_mouse_drag": self.browser_mouse_drag,
|
|
2358
|
+
"browser_press_key": self.browser_press_key,
|
|
2048
2359
|
"browser_wait_user": self.browser_wait_user,
|
|
2049
2360
|
"browser_solve_task": self.browser_solve_task,
|
|
2050
2361
|
"browser_switch_tab": self.browser_switch_tab,
|
|
2051
2362
|
"browser_close_tab": self.browser_close_tab,
|
|
2052
2363
|
"browser_get_tab_info": self.browser_get_tab_info,
|
|
2364
|
+
"browser_console_view": self.browser_console_view,
|
|
2365
|
+
"browser_console_exec": self.browser_console_exec,
|
|
2053
2366
|
}
|
|
2054
2367
|
|
|
2055
2368
|
enabled_tools = []
|
|
@@ -43,7 +43,11 @@ class PageSnapshot:
|
|
|
43
43
|
# Public API
|
|
44
44
|
# ---------------------------------------------------------------------
|
|
45
45
|
async def capture(
|
|
46
|
-
self,
|
|
46
|
+
self,
|
|
47
|
+
*,
|
|
48
|
+
force_refresh: bool = False,
|
|
49
|
+
diff_only: bool = False,
|
|
50
|
+
viewport_limit: bool = False,
|
|
47
51
|
) -> str:
|
|
48
52
|
"""Return current snapshot or just the diff to previous one."""
|
|
49
53
|
try:
|
|
@@ -65,7 +69,9 @@ class PageSnapshot:
|
|
|
65
69
|
)
|
|
66
70
|
|
|
67
71
|
logger.debug("Capturing page snapshot …")
|
|
68
|
-
snapshot_result = await self._get_snapshot_direct(
|
|
72
|
+
snapshot_result = await self._get_snapshot_direct(
|
|
73
|
+
viewport_limit=viewport_limit
|
|
74
|
+
)
|
|
69
75
|
|
|
70
76
|
# Extract snapshot text from the unified analyzer result
|
|
71
77
|
if (
|
|
@@ -111,7 +117,7 @@ class PageSnapshot:
|
|
|
111
117
|
_snapshot_js_cache: Optional[str] = None # class-level cache
|
|
112
118
|
|
|
113
119
|
async def _get_snapshot_direct(
|
|
114
|
-
self,
|
|
120
|
+
self, viewport_limit: bool = False
|
|
115
121
|
) -> Optional[Union[str, Dict[str, Any]]]:
|
|
116
122
|
r"""Evaluate the snapshot-extraction JS with simple retry logic.
|
|
117
123
|
|
|
@@ -133,7 +139,7 @@ class PageSnapshot:
|
|
|
133
139
|
retries: int = 3
|
|
134
140
|
while retries > 0:
|
|
135
141
|
try:
|
|
136
|
-
return await self.page.evaluate(js_code)
|
|
142
|
+
return await self.page.evaluate(js_code, viewport_limit)
|
|
137
143
|
except Exception as e:
|
|
138
144
|
msg = str(e)
|
|
139
145
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
(() => {
|
|
1
|
+
((viewport_limit = false) => {
|
|
2
2
|
// Unified analyzer that combines visual and structural analysis
|
|
3
3
|
// Preserves complete snapshot.js logic while adding visual coordinate information
|
|
4
4
|
|
|
@@ -406,6 +406,11 @@
|
|
|
406
406
|
if (tagName === 'header') return 'banner';
|
|
407
407
|
if (tagName === 'footer') return 'contentinfo';
|
|
408
408
|
if (tagName === 'fieldset') return 'group';
|
|
409
|
+
|
|
410
|
+
// Enhanced role mappings for table elements
|
|
411
|
+
if (tagName === 'table') return 'table';
|
|
412
|
+
if (tagName === 'tr') return 'row';
|
|
413
|
+
if (tagName === 'td' || tagName === 'th') return 'cell';
|
|
409
414
|
|
|
410
415
|
return 'generic';
|
|
411
416
|
}
|
|
@@ -484,6 +489,9 @@
|
|
|
484
489
|
|
|
485
490
|
// Add a heuristic to ignore code-like text that might be in the DOM
|
|
486
491
|
if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
|
|
487
495
|
return text;
|
|
488
496
|
}
|
|
489
497
|
|
|
@@ -578,6 +586,8 @@
|
|
|
578
586
|
const level = getAriaLevel(element);
|
|
579
587
|
if (level > 0) node.level = level;
|
|
580
588
|
|
|
589
|
+
|
|
590
|
+
|
|
581
591
|
return node;
|
|
582
592
|
}
|
|
583
593
|
|
|
@@ -725,6 +735,9 @@
|
|
|
725
735
|
if (isRedundantWrapper) {
|
|
726
736
|
return node.children;
|
|
727
737
|
}
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
|
|
728
741
|
return [node];
|
|
729
742
|
}
|
|
730
743
|
|
|
@@ -815,6 +828,23 @@
|
|
|
815
828
|
|
|
816
829
|
// === Visual analysis functions from page_script.js ===
|
|
817
830
|
|
|
831
|
+
// Check if element is within the current viewport
|
|
832
|
+
function isInViewport(element) {
|
|
833
|
+
if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
|
|
834
|
+
|
|
835
|
+
try {
|
|
836
|
+
const rect = element.getBoundingClientRect();
|
|
837
|
+
return (
|
|
838
|
+
rect.top >= 0 &&
|
|
839
|
+
rect.left >= 0 &&
|
|
840
|
+
rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
|
|
841
|
+
rect.right <= (window.innerWidth || document.documentElement.clientWidth)
|
|
842
|
+
);
|
|
843
|
+
} catch (e) {
|
|
844
|
+
return false;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
818
848
|
// From page_script.js - check if element is topmost at coordinates
|
|
819
849
|
function isTopmost(element, x, y) {
|
|
820
850
|
let hit = document.elementFromPoint(x, y);
|
|
@@ -855,10 +885,21 @@
|
|
|
855
885
|
|
|
856
886
|
// === Unified analysis function ===
|
|
857
887
|
|
|
858
|
-
function collectElementsFromTree(node, elementsMap) {
|
|
888
|
+
function collectElementsFromTree(node, elementsMap, viewportLimitEnabled = false) {
|
|
859
889
|
if (typeof node === 'string') return;
|
|
860
890
|
|
|
861
891
|
if (node.element && node.ref) {
|
|
892
|
+
// If viewport_limit is enabled, only include elements that are in the viewport
|
|
893
|
+
if (viewportLimitEnabled && !isInViewport(node.element)) {
|
|
894
|
+
// Skip this element but still process its children
|
|
895
|
+
if (node.children) {
|
|
896
|
+
for (const child of node.children) {
|
|
897
|
+
collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
return;
|
|
901
|
+
}
|
|
902
|
+
|
|
862
903
|
// Get visual coordinates for this element
|
|
863
904
|
const coordinates = getElementCoordinates(node.element);
|
|
864
905
|
|
|
@@ -891,7 +932,7 @@
|
|
|
891
932
|
// Recursively process children
|
|
892
933
|
if (node.children) {
|
|
893
934
|
for (const child of node.children) {
|
|
894
|
-
collectElementsFromTree(child, elementsMap);
|
|
935
|
+
collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
|
|
895
936
|
}
|
|
896
937
|
}
|
|
897
938
|
}
|
|
@@ -931,7 +972,7 @@
|
|
|
931
972
|
[tree] = normalizeTree(tree);
|
|
932
973
|
|
|
933
974
|
const elementsMap = {};
|
|
934
|
-
collectElementsFromTree(tree, elementsMap);
|
|
975
|
+
collectElementsFromTree(tree, elementsMap, viewport_limit);
|
|
935
976
|
|
|
936
977
|
// Verify uniqueness of aria-ref attributes (debugging aid)
|
|
937
978
|
const ariaRefCounts = {};
|