camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +2217 -519
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/agent_memories.py +48 -4
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/context_creators/score_based.py +25 -384
- camel/memories/records.py +88 -8
- camel/messages/base.py +153 -34
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +6 -19
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +114 -89
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +32 -14
- camel/models/cohere_model.py +1 -16
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +36 -18
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +105 -24
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +62 -41
- camel/models/openai_model.py +62 -57
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/__init__.py +2 -0
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +146 -66
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +302 -65
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +163 -27
- camel/societies/workforce/utils.py +107 -13
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +1949 -579
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +168 -145
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/base.py +6 -4
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dappier_toolkit.py +5 -1
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/excel_toolkit.py +1 -1
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
- camel/toolkits/function_tool.py +13 -3
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +366 -71
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_integration.py +18 -13
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +19 -10
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/search_toolkit.py +264 -91
- camel/toolkits/slack_toolkit.py +64 -10
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/enums.py +274 -7
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +15 -0
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/token_counting.py +43 -20
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1550
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -13,22 +13,39 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
# =========
|
|
15
15
|
|
|
16
|
+
import contextlib
|
|
16
17
|
import time
|
|
17
|
-
from typing import
|
|
18
|
+
from typing import (
|
|
19
|
+
Any,
|
|
20
|
+
Callable,
|
|
21
|
+
ClassVar,
|
|
22
|
+
Dict,
|
|
23
|
+
List,
|
|
24
|
+
Optional,
|
|
25
|
+
TypedDict,
|
|
26
|
+
cast,
|
|
27
|
+
)
|
|
18
28
|
|
|
19
29
|
from camel.logger import get_logger
|
|
20
30
|
from camel.messages import BaseMessage
|
|
21
|
-
from camel.models import BaseModelBackend
|
|
22
31
|
from camel.toolkits.base import BaseToolkit, RegisteredAgentToolkit
|
|
23
32
|
from camel.toolkits.function_tool import FunctionTool
|
|
24
33
|
from camel.utils.commons import dependencies_required
|
|
25
34
|
|
|
26
35
|
from .config_loader import ConfigLoader
|
|
27
|
-
from .ws_wrapper import WebSocketBrowserWrapper
|
|
36
|
+
from .ws_wrapper import WebSocketBrowserWrapper, high_level_action
|
|
28
37
|
|
|
29
38
|
logger = get_logger(__name__)
|
|
30
39
|
|
|
31
40
|
|
|
41
|
+
class SheetCell(TypedDict):
|
|
42
|
+
"""Type definition for a sheet cell input."""
|
|
43
|
+
|
|
44
|
+
row: int
|
|
45
|
+
col: int
|
|
46
|
+
text: str
|
|
47
|
+
|
|
48
|
+
|
|
32
49
|
class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
33
50
|
r"""A hybrid browser toolkit that combines non-visual, DOM-based browser
|
|
34
51
|
automation with visual, screenshot-based capabilities.
|
|
@@ -37,7 +54,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
37
54
|
_snapshotForAI functionality for enhanced AI integration.
|
|
38
55
|
"""
|
|
39
56
|
|
|
40
|
-
# Default tool list - core browser functionality
|
|
41
57
|
DEFAULT_TOOLS: ClassVar[List[str]] = [
|
|
42
58
|
"browser_open",
|
|
43
59
|
"browser_close",
|
|
@@ -49,7 +65,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
49
65
|
"browser_switch_tab",
|
|
50
66
|
]
|
|
51
67
|
|
|
52
|
-
# All available tools
|
|
53
68
|
ALL_TOOLS: ClassVar[List[str]] = [
|
|
54
69
|
"browser_open",
|
|
55
70
|
"browser_close",
|
|
@@ -58,17 +73,22 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
58
73
|
"browser_forward",
|
|
59
74
|
"browser_get_page_snapshot",
|
|
60
75
|
"browser_get_som_screenshot",
|
|
61
|
-
"browser_get_page_links",
|
|
62
76
|
"browser_click",
|
|
63
77
|
"browser_type",
|
|
64
78
|
"browser_select",
|
|
65
79
|
"browser_scroll",
|
|
66
80
|
"browser_enter",
|
|
81
|
+
"browser_mouse_control",
|
|
82
|
+
"browser_mouse_drag",
|
|
83
|
+
"browser_press_key",
|
|
67
84
|
"browser_wait_user",
|
|
68
|
-
"browser_solve_task",
|
|
69
85
|
"browser_switch_tab",
|
|
70
86
|
"browser_close_tab",
|
|
71
87
|
"browser_get_tab_info",
|
|
88
|
+
"browser_console_view",
|
|
89
|
+
"browser_console_exec",
|
|
90
|
+
"browser_sheet_input",
|
|
91
|
+
"browser_sheet_read",
|
|
72
92
|
]
|
|
73
93
|
|
|
74
94
|
def __init__(
|
|
@@ -77,12 +97,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
77
97
|
headless: bool = True,
|
|
78
98
|
user_data_dir: Optional[str] = None,
|
|
79
99
|
stealth: bool = False,
|
|
80
|
-
|
|
81
|
-
cache_dir: str = "tmp/",
|
|
100
|
+
cache_dir: Optional[str] = None,
|
|
82
101
|
enabled_tools: Optional[List[str]] = None,
|
|
83
102
|
browser_log_to_file: bool = False,
|
|
103
|
+
log_dir: Optional[str] = None,
|
|
84
104
|
session_id: Optional[str] = None,
|
|
85
|
-
default_start_url: str =
|
|
105
|
+
default_start_url: Optional[str] = None,
|
|
86
106
|
default_timeout: Optional[int] = None,
|
|
87
107
|
short_timeout: Optional[int] = None,
|
|
88
108
|
navigation_timeout: Optional[int] = None,
|
|
@@ -93,6 +113,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
93
113
|
viewport_limit: bool = False,
|
|
94
114
|
connect_over_cdp: bool = False,
|
|
95
115
|
cdp_url: Optional[str] = None,
|
|
116
|
+
cdp_keep_current_page: bool = False,
|
|
117
|
+
full_visual_mode: bool = False,
|
|
96
118
|
) -> None:
|
|
97
119
|
r"""Initialize the HybridBrowserToolkit.
|
|
98
120
|
|
|
@@ -103,13 +125,13 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
103
125
|
persistence. Defaults to None.
|
|
104
126
|
stealth (bool): Whether to enable stealth mode. Defaults to
|
|
105
127
|
False.
|
|
106
|
-
web_agent_model (Optional[BaseModelBackend]): Model for web
|
|
107
|
-
agent operations. Defaults to None.
|
|
108
128
|
cache_dir (str): Directory for caching. Defaults to "tmp/".
|
|
109
129
|
enabled_tools (Optional[List[str]]): List of enabled tools.
|
|
110
130
|
Defaults to None.
|
|
111
131
|
browser_log_to_file (bool): Whether to log browser actions to
|
|
112
132
|
file. Defaults to False.
|
|
133
|
+
log_dir (Optional[str]): Custom directory path for log files.
|
|
134
|
+
If None, defaults to "browser_log". Defaults to None.
|
|
113
135
|
session_id (Optional[str]): Session identifier. Defaults to None.
|
|
114
136
|
default_start_url (str): Default URL to start with. Defaults
|
|
115
137
|
to "https://google.com/".
|
|
@@ -138,11 +160,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
138
160
|
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
|
|
139
161
|
connection (e.g., 'ws://localhost:9222/devtools/browser/...').
|
|
140
162
|
Required when connect_over_cdp is True. Defaults to None.
|
|
163
|
+
cdp_keep_current_page (bool): When True and using CDP mode,
|
|
164
|
+
won't create new pages but use the existing one. Defaults to False.
|
|
165
|
+
full_visual_mode (bool): When True, browser actions like click,
|
|
166
|
+
browser_open, visit_page, etc. will not return snapshots.
|
|
167
|
+
Defaults to False.
|
|
141
168
|
"""
|
|
142
169
|
super().__init__()
|
|
143
170
|
RegisteredAgentToolkit.__init__(self)
|
|
144
171
|
|
|
145
|
-
# Initialize configuration loader
|
|
146
172
|
self.config_loader = ConfigLoader.from_kwargs(
|
|
147
173
|
headless=headless,
|
|
148
174
|
user_data_dir=user_data_dir,
|
|
@@ -158,27 +184,39 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
158
184
|
viewport_limit=viewport_limit,
|
|
159
185
|
cache_dir=cache_dir,
|
|
160
186
|
browser_log_to_file=browser_log_to_file,
|
|
187
|
+
log_dir=log_dir,
|
|
161
188
|
session_id=session_id,
|
|
162
189
|
enabled_tools=enabled_tools,
|
|
163
190
|
connect_over_cdp=connect_over_cdp,
|
|
164
191
|
cdp_url=cdp_url,
|
|
192
|
+
cdp_keep_current_page=cdp_keep_current_page,
|
|
193
|
+
full_visual_mode=full_visual_mode,
|
|
165
194
|
)
|
|
166
195
|
|
|
167
|
-
# Legacy attribute access for backward compatibility
|
|
168
196
|
browser_config = self.config_loader.get_browser_config()
|
|
169
197
|
toolkit_config = self.config_loader.get_toolkit_config()
|
|
170
198
|
|
|
199
|
+
if (
|
|
200
|
+
browser_config.cdp_keep_current_page
|
|
201
|
+
and default_start_url is not None
|
|
202
|
+
):
|
|
203
|
+
raise ValueError(
|
|
204
|
+
"Cannot use default_start_url with "
|
|
205
|
+
"cdp_keep_current_page=True. When cdp_keep_current_page "
|
|
206
|
+
"is True, the browser will keep the current page and not "
|
|
207
|
+
"navigate to any URL."
|
|
208
|
+
)
|
|
209
|
+
|
|
171
210
|
self._headless = browser_config.headless
|
|
172
211
|
self._user_data_dir = browser_config.user_data_dir
|
|
173
212
|
self._stealth = browser_config.stealth
|
|
174
|
-
self._web_agent_model = web_agent_model
|
|
175
213
|
self._cache_dir = toolkit_config.cache_dir
|
|
176
214
|
self._browser_log_to_file = toolkit_config.browser_log_to_file
|
|
177
215
|
self._default_start_url = browser_config.default_start_url
|
|
178
216
|
self._session_id = toolkit_config.session_id or "default"
|
|
179
217
|
self._viewport_limit = browser_config.viewport_limit
|
|
218
|
+
self._full_visual_mode = browser_config.full_visual_mode
|
|
180
219
|
|
|
181
|
-
# Store timeout configuration for backward compatibility
|
|
182
220
|
self._default_timeout = browser_config.default_timeout
|
|
183
221
|
self._short_timeout = browser_config.short_timeout
|
|
184
222
|
self._navigation_timeout = browser_config.navigation_timeout
|
|
@@ -189,11 +227,9 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
189
227
|
browser_config.dom_content_loaded_timeout
|
|
190
228
|
)
|
|
191
229
|
|
|
192
|
-
# Configure enabled tools
|
|
193
230
|
if enabled_tools is None:
|
|
194
231
|
self.enabled_tools = self.DEFAULT_TOOLS.copy()
|
|
195
232
|
else:
|
|
196
|
-
# Validate enabled tools
|
|
197
233
|
invalid_tools = [
|
|
198
234
|
tool for tool in enabled_tools if tool not in self.ALL_TOOLS
|
|
199
235
|
]
|
|
@@ -206,7 +242,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
206
242
|
|
|
207
243
|
logger.info(f"Enabled tools: {self.enabled_tools}")
|
|
208
244
|
|
|
209
|
-
# Initialize WebSocket wrapper
|
|
210
245
|
self._ws_wrapper: Optional[WebSocketBrowserWrapper] = None
|
|
211
246
|
self._ws_config = self.config_loader.to_ws_config()
|
|
212
247
|
|
|
@@ -233,13 +268,29 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
233
268
|
|
|
234
269
|
import asyncio
|
|
235
270
|
|
|
271
|
+
is_cdp = (
|
|
272
|
+
self._ws_config.get('connectOverCdp', False)
|
|
273
|
+
if hasattr(self, '_ws_config')
|
|
274
|
+
else False
|
|
275
|
+
)
|
|
276
|
+
|
|
236
277
|
try:
|
|
237
278
|
loop = asyncio.get_event_loop()
|
|
238
279
|
if not loop.is_closed() and not loop.is_running():
|
|
239
280
|
try:
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
281
|
+
if is_cdp:
|
|
282
|
+
# CDP: disconnect only
|
|
283
|
+
loop.run_until_complete(
|
|
284
|
+
asyncio.wait_for(
|
|
285
|
+
self.disconnect_websocket(), timeout=2.0
|
|
286
|
+
)
|
|
287
|
+
)
|
|
288
|
+
else:
|
|
289
|
+
loop.run_until_complete(
|
|
290
|
+
asyncio.wait_for(
|
|
291
|
+
self.browser_close(), timeout=2.0
|
|
292
|
+
)
|
|
293
|
+
)
|
|
243
294
|
except asyncio.TimeoutError:
|
|
244
295
|
pass
|
|
245
296
|
except (RuntimeError, ImportError):
|
|
@@ -247,23 +298,11 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
247
298
|
except Exception:
|
|
248
299
|
pass
|
|
249
300
|
|
|
250
|
-
@property
|
|
251
|
-
def web_agent_model(self) -> Optional[BaseModelBackend]:
|
|
252
|
-
"""Get the web agent model."""
|
|
253
|
-
return self._web_agent_model
|
|
254
|
-
|
|
255
|
-
@web_agent_model.setter
|
|
256
|
-
def web_agent_model(self, value: Optional[BaseModelBackend]) -> None:
|
|
257
|
-
"""Set the web agent model."""
|
|
258
|
-
self._web_agent_model = value
|
|
259
|
-
|
|
260
301
|
@property
|
|
261
302
|
def cache_dir(self) -> str:
|
|
262
303
|
"""Get the cache directory."""
|
|
263
304
|
return self._cache_dir
|
|
264
305
|
|
|
265
|
-
# Public API Methods
|
|
266
|
-
|
|
267
306
|
async def browser_open(self) -> Dict[str, Any]:
|
|
268
307
|
r"""Starts a new browser session. This must be the first browser
|
|
269
308
|
action.
|
|
@@ -284,7 +323,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
284
323
|
ws_wrapper = await self._get_ws_wrapper()
|
|
285
324
|
result = await ws_wrapper.open_browser(self._default_start_url)
|
|
286
325
|
|
|
287
|
-
# Add tab information
|
|
288
326
|
tab_info = await ws_wrapper.get_tab_info()
|
|
289
327
|
result.update(
|
|
290
328
|
{
|
|
@@ -329,6 +367,31 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
329
367
|
logger.error(f"Failed to close browser: {e}")
|
|
330
368
|
return f"Error closing browser: {e}"
|
|
331
369
|
|
|
370
|
+
async def disconnect_websocket(self) -> str:
|
|
371
|
+
r"""Disconnects the WebSocket connection without closing the browser.
|
|
372
|
+
|
|
373
|
+
This is useful when using CDP mode where the browser should
|
|
374
|
+
remain open.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
str: A confirmation message.
|
|
378
|
+
"""
|
|
379
|
+
try:
|
|
380
|
+
if self._ws_wrapper:
|
|
381
|
+
is_cdp = self._ws_config.get('connectOverCdp', False)
|
|
382
|
+
|
|
383
|
+
if is_cdp:
|
|
384
|
+
# CDP: disconnect only
|
|
385
|
+
await self._ws_wrapper.disconnect_only()
|
|
386
|
+
else:
|
|
387
|
+
await self._ws_wrapper.stop()
|
|
388
|
+
|
|
389
|
+
self._ws_wrapper = None
|
|
390
|
+
return "WebSocket disconnected."
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"Failed to disconnect WebSocket: {e}")
|
|
393
|
+
return f"Error disconnecting WebSocket: {e}"
|
|
394
|
+
|
|
332
395
|
async def browser_visit_page(self, url: str) -> Dict[str, Any]:
|
|
333
396
|
r"""Opens a URL in a new browser tab and switches to it.
|
|
334
397
|
|
|
@@ -348,7 +411,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
348
411
|
ws_wrapper = await self._get_ws_wrapper()
|
|
349
412
|
result = await ws_wrapper.visit_page(url)
|
|
350
413
|
|
|
351
|
-
# Add tab information
|
|
352
414
|
tab_info = await ws_wrapper.get_tab_info()
|
|
353
415
|
result.update(
|
|
354
416
|
{
|
|
@@ -394,7 +456,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
394
456
|
ws_wrapper = await self._get_ws_wrapper()
|
|
395
457
|
result = await ws_wrapper.back()
|
|
396
458
|
|
|
397
|
-
# Add tab information
|
|
398
459
|
tab_info = await ws_wrapper.get_tab_info()
|
|
399
460
|
result.update(
|
|
400
461
|
{
|
|
@@ -440,7 +501,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
440
501
|
ws_wrapper = await self._get_ws_wrapper()
|
|
441
502
|
result = await ws_wrapper.forward()
|
|
442
503
|
|
|
443
|
-
# Add tab information
|
|
444
504
|
tab_info = await ws_wrapper.get_tab_info()
|
|
445
505
|
result.update(
|
|
446
506
|
{
|
|
@@ -532,19 +592,14 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
532
592
|
ws_wrapper = await self._get_ws_wrapper()
|
|
533
593
|
result = await ws_wrapper.get_som_screenshot()
|
|
534
594
|
|
|
535
|
-
# Initialize result text
|
|
536
595
|
result_text = result.text
|
|
537
596
|
file_path = None
|
|
538
597
|
|
|
539
|
-
# Save screenshot to cache directory if images are available
|
|
540
598
|
if result.images:
|
|
541
|
-
# Ensure cache directory exists (use absolute path)
|
|
542
599
|
cache_dir = os.path.abspath(self._cache_dir)
|
|
543
600
|
os.makedirs(cache_dir, exist_ok=True)
|
|
544
601
|
|
|
545
|
-
# Get current page URL for filename
|
|
546
602
|
try:
|
|
547
|
-
# Try to get the current page URL from the wrapper
|
|
548
603
|
page_info = await ws_wrapper.get_tab_info()
|
|
549
604
|
current_tab = next(
|
|
550
605
|
(tab for tab in page_info if tab.get('is_current')),
|
|
@@ -554,7 +609,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
554
609
|
except Exception:
|
|
555
610
|
url = 'unknown'
|
|
556
611
|
|
|
557
|
-
# Generate filename
|
|
558
612
|
parsed_url = urllib.parse.urlparse(url)
|
|
559
613
|
url_name = sanitize_filename(
|
|
560
614
|
str(parsed_url.path) or 'homepage', max_length=241
|
|
@@ -564,24 +618,19 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
564
618
|
cache_dir, f"{url_name}_{timestamp}_som.png"
|
|
565
619
|
)
|
|
566
620
|
|
|
567
|
-
# Extract base64 data and save to file
|
|
568
621
|
for _, image_data in enumerate(result.images):
|
|
569
622
|
if image_data.startswith('data:image/png;base64,'):
|
|
570
|
-
# Remove data URL prefix
|
|
571
623
|
base64_data = image_data.split(',', 1)[1]
|
|
572
624
|
|
|
573
|
-
# Decode and save
|
|
574
625
|
image_bytes = base64.b64decode(base64_data)
|
|
575
626
|
with open(file_path, 'wb') as f:
|
|
576
627
|
f.write(image_bytes)
|
|
577
628
|
|
|
578
629
|
logger.info(f"Screenshot saved to: {file_path}")
|
|
579
630
|
|
|
580
|
-
# Update result text to include file path
|
|
581
631
|
result_text += f" (saved to: {file_path})"
|
|
582
632
|
break
|
|
583
633
|
|
|
584
|
-
# Analyze image if requested and agent is registered
|
|
585
634
|
if read_image and file_path:
|
|
586
635
|
if self.agent is None:
|
|
587
636
|
logger.error(
|
|
@@ -596,7 +645,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
596
645
|
)
|
|
597
646
|
else:
|
|
598
647
|
try:
|
|
599
|
-
# Load the image and create a message
|
|
600
648
|
from PIL import Image
|
|
601
649
|
|
|
602
650
|
img = Image.open(file_path)
|
|
@@ -607,7 +655,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
607
655
|
image_list=[img],
|
|
608
656
|
)
|
|
609
657
|
|
|
610
|
-
# Get agent's analysis
|
|
611
658
|
response = await self.agent.astep(message)
|
|
612
659
|
agent_response = response.msgs[0].content
|
|
613
660
|
result_text += f". Agent analysis: {agent_response}"
|
|
@@ -641,24 +688,30 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
641
688
|
ws_wrapper = await self._get_ws_wrapper()
|
|
642
689
|
result = await ws_wrapper.click(ref)
|
|
643
690
|
|
|
644
|
-
# Add tab information
|
|
645
691
|
tab_info = await ws_wrapper.get_tab_info()
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
)
|
|
655
|
-
|
|
692
|
+
|
|
693
|
+
response = {
|
|
694
|
+
"result": result.get("result", ""),
|
|
695
|
+
"snapshot": result.get("snapshot", ""),
|
|
696
|
+
"tabs": tab_info,
|
|
697
|
+
"current_tab": next(
|
|
698
|
+
(
|
|
699
|
+
i
|
|
700
|
+
for i, tab in enumerate(tab_info)
|
|
701
|
+
if tab.get("is_current")
|
|
656
702
|
),
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
703
|
+
0,
|
|
704
|
+
),
|
|
705
|
+
"total_tabs": len(tab_info),
|
|
706
|
+
}
|
|
660
707
|
|
|
661
|
-
|
|
708
|
+
if "newTabId" in result:
|
|
709
|
+
response["newTabId"] = result["newTabId"]
|
|
710
|
+
|
|
711
|
+
if "timing" in result:
|
|
712
|
+
response["timing"] = result["timing"]
|
|
713
|
+
|
|
714
|
+
return response
|
|
662
715
|
except Exception as e:
|
|
663
716
|
logger.error(f"Failed to click element: {e}")
|
|
664
717
|
return {
|
|
@@ -669,12 +722,29 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
669
722
|
"total_tabs": 0,
|
|
670
723
|
}
|
|
671
724
|
|
|
672
|
-
async def browser_type(
|
|
673
|
-
|
|
725
|
+
async def browser_type(
|
|
726
|
+
self,
|
|
727
|
+
*,
|
|
728
|
+
ref: Optional[str] = None,
|
|
729
|
+
text: Optional[str] = None,
|
|
730
|
+
inputs: Optional[List[Dict[str, str]]] = None,
|
|
731
|
+
) -> Dict[str, Any]:
|
|
732
|
+
r"""Types text into one or more input elements on the page.
|
|
733
|
+
|
|
734
|
+
This method supports two modes:
|
|
735
|
+
1. Single input mode (backward compatible): Provide 'ref' and 'text'
|
|
736
|
+
2. Multiple inputs mode: Provide 'inputs' as a list of dictionaries
|
|
737
|
+
with 'ref' and 'text' keys
|
|
674
738
|
|
|
675
739
|
Args:
|
|
676
|
-
ref (str): The `ref` ID of the input element, from a
|
|
677
|
-
|
|
740
|
+
ref (Optional[str]): The `ref` ID of the input element, from a
|
|
741
|
+
snapshot. Required when using single input mode.
|
|
742
|
+
text (Optional[str]): The text to type into the element. Required
|
|
743
|
+
when using single input mode.
|
|
744
|
+
inputs (Optional[List[Dict[str, str]]]): List of dictionaries,
|
|
745
|
+
each containing 'ref' and 'text' keys for typing into multiple
|
|
746
|
+
elements. Example: [{'ref': '1', 'text': 'username'},
|
|
747
|
+
{'ref': '2', 'text': 'password'}]
|
|
678
748
|
|
|
679
749
|
Returns:
|
|
680
750
|
Dict[str, Any]: A dictionary with the result of the action:
|
|
@@ -684,12 +754,22 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
684
754
|
- "tabs" (List[Dict]): Information about all open tabs.
|
|
685
755
|
- "current_tab" (int): Index of the active tab.
|
|
686
756
|
- "total_tabs" (int): Total number of open tabs.
|
|
757
|
+
- "details" (Dict[str, Any]): When using multiple inputs,
|
|
758
|
+
contains success/error status for each ref.
|
|
687
759
|
"""
|
|
688
760
|
try:
|
|
689
761
|
ws_wrapper = await self._get_ws_wrapper()
|
|
690
|
-
result = await ws_wrapper.type(ref, text)
|
|
691
762
|
|
|
692
|
-
|
|
763
|
+
if ref is not None and text is not None:
|
|
764
|
+
result = await ws_wrapper.type(ref, text)
|
|
765
|
+
elif inputs is not None:
|
|
766
|
+
result = await ws_wrapper.type_multiple(inputs)
|
|
767
|
+
else:
|
|
768
|
+
raise ValueError(
|
|
769
|
+
"Either provide 'ref' and 'text' for single input, "
|
|
770
|
+
"or 'inputs' for multiple inputs"
|
|
771
|
+
)
|
|
772
|
+
|
|
693
773
|
tab_info = await ws_wrapper.get_tab_info()
|
|
694
774
|
result.update(
|
|
695
775
|
{
|
|
@@ -738,7 +818,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
738
818
|
ws_wrapper = await self._get_ws_wrapper()
|
|
739
819
|
result = await ws_wrapper.select(ref, value)
|
|
740
820
|
|
|
741
|
-
# Add tab information
|
|
742
821
|
tab_info = await ws_wrapper.get_tab_info()
|
|
743
822
|
result.update(
|
|
744
823
|
{
|
|
@@ -787,7 +866,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
787
866
|
ws_wrapper = await self._get_ws_wrapper()
|
|
788
867
|
result = await ws_wrapper.scroll(direction, amount)
|
|
789
868
|
|
|
790
|
-
# Add tab information
|
|
791
869
|
tab_info = await ws_wrapper.get_tab_info()
|
|
792
870
|
result.update(
|
|
793
871
|
{
|
|
@@ -835,7 +913,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
835
913
|
ws_wrapper = await self._get_ws_wrapper()
|
|
836
914
|
result = await ws_wrapper.enter()
|
|
837
915
|
|
|
838
|
-
# Add tab information
|
|
839
916
|
tab_info = await ws_wrapper.get_tab_info()
|
|
840
917
|
result.update(
|
|
841
918
|
{
|
|
@@ -863,6 +940,153 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
863
940
|
"total_tabs": 0,
|
|
864
941
|
}
|
|
865
942
|
|
|
943
|
+
async def browser_mouse_control(
|
|
944
|
+
self, *, control: str, x: float, y: float
|
|
945
|
+
) -> Dict[str, Any]:
|
|
946
|
+
r"""Control the mouse to interact with browser with x, y coordinates
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
control ([str]): The action to perform: 'click', 'right_click'
|
|
950
|
+
or 'dblclick'.
|
|
951
|
+
x (float): x-coordinate for the control action.
|
|
952
|
+
y (float): y-coordinate for the control action.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
956
|
+
- "result" (str): Confirmation of the action.
|
|
957
|
+
- "snapshot" (str): A snapshot of the page after mouse
|
|
958
|
+
control action.
|
|
959
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
960
|
+
- "current_tab" (int): Index of the active tab.
|
|
961
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
962
|
+
"""
|
|
963
|
+
try:
|
|
964
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
965
|
+
result = await ws_wrapper.mouse_control(control, x, y)
|
|
966
|
+
|
|
967
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
968
|
+
result.update(
|
|
969
|
+
{
|
|
970
|
+
"tabs": tab_info,
|
|
971
|
+
"current_tab": next(
|
|
972
|
+
(
|
|
973
|
+
i
|
|
974
|
+
for i, tab in enumerate(tab_info)
|
|
975
|
+
if tab.get("is_current")
|
|
976
|
+
),
|
|
977
|
+
0,
|
|
978
|
+
),
|
|
979
|
+
"total_tabs": len(tab_info),
|
|
980
|
+
}
|
|
981
|
+
)
|
|
982
|
+
|
|
983
|
+
return result
|
|
984
|
+
except Exception as e:
|
|
985
|
+
logger.error(f"Failed to control mouse: {e}")
|
|
986
|
+
return {
|
|
987
|
+
"result": f"Error with mouse control: {e}",
|
|
988
|
+
"snapshot": "",
|
|
989
|
+
"tabs": [],
|
|
990
|
+
"current_tab": 0,
|
|
991
|
+
"total_tabs": 0,
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
async def browser_mouse_drag(
|
|
995
|
+
self, *, from_ref: str, to_ref: str
|
|
996
|
+
) -> Dict[str, Any]:
|
|
997
|
+
r"""Control the mouse to drag and drop in the browser using ref IDs.
|
|
998
|
+
|
|
999
|
+
Args:
|
|
1000
|
+
from_ref (str): The `ref` ID of the source element to drag from.
|
|
1001
|
+
to_ref (str): The `ref` ID of the target element to drag to.
|
|
1002
|
+
|
|
1003
|
+
Returns:
|
|
1004
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1005
|
+
- "result" (str): Confirmation of the action.
|
|
1006
|
+
- "snapshot" (str): A new page snapshot.
|
|
1007
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1008
|
+
- "current_tab" (int): Index of the active tab.
|
|
1009
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1010
|
+
"""
|
|
1011
|
+
try:
|
|
1012
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1013
|
+
result = await ws_wrapper.mouse_drag(from_ref, to_ref)
|
|
1014
|
+
|
|
1015
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1016
|
+
result.update(
|
|
1017
|
+
{
|
|
1018
|
+
"tabs": tab_info,
|
|
1019
|
+
"current_tab": next(
|
|
1020
|
+
(
|
|
1021
|
+
i
|
|
1022
|
+
for i, tab in enumerate(tab_info)
|
|
1023
|
+
if tab.get("is_current")
|
|
1024
|
+
),
|
|
1025
|
+
0,
|
|
1026
|
+
),
|
|
1027
|
+
"total_tabs": len(tab_info),
|
|
1028
|
+
}
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
return result
|
|
1032
|
+
except Exception as e:
|
|
1033
|
+
logger.error(f"Error with mouse drag and drop: {e}")
|
|
1034
|
+
return {
|
|
1035
|
+
"result": f"Error with mouse drag and drop: {e}",
|
|
1036
|
+
"snapshot": "",
|
|
1037
|
+
"tabs": [],
|
|
1038
|
+
"current_tab": 0,
|
|
1039
|
+
"total_tabs": 0,
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
|
|
1043
|
+
r"""Press key and key combinations.
|
|
1044
|
+
Supports single key press or combination of keys by concatenating
|
|
1045
|
+
them with '+' separator.
|
|
1046
|
+
|
|
1047
|
+
Args:
|
|
1048
|
+
keys (List[str]): key or list of keys.
|
|
1049
|
+
|
|
1050
|
+
Returns:
|
|
1051
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1052
|
+
- "result" (str): Confirmation of the action.
|
|
1053
|
+
- "snapshot" (str): A snapshot of the page after
|
|
1054
|
+
press key action.
|
|
1055
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1056
|
+
- "current_tab" (int): Index of the active tab.
|
|
1057
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1058
|
+
"""
|
|
1059
|
+
try:
|
|
1060
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1061
|
+
result = await ws_wrapper.press_key(keys)
|
|
1062
|
+
|
|
1063
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1064
|
+
result.update(
|
|
1065
|
+
{
|
|
1066
|
+
"tabs": tab_info,
|
|
1067
|
+
"current_tab": next(
|
|
1068
|
+
(
|
|
1069
|
+
i
|
|
1070
|
+
for i, tab in enumerate(tab_info)
|
|
1071
|
+
if tab.get("is_current")
|
|
1072
|
+
),
|
|
1073
|
+
0,
|
|
1074
|
+
),
|
|
1075
|
+
"total_tabs": len(tab_info),
|
|
1076
|
+
}
|
|
1077
|
+
)
|
|
1078
|
+
|
|
1079
|
+
return result
|
|
1080
|
+
except Exception as e:
|
|
1081
|
+
logger.error(f"Failed to press key: {e}")
|
|
1082
|
+
return {
|
|
1083
|
+
"result": f"Error with press key: {e}",
|
|
1084
|
+
"snapshot": "",
|
|
1085
|
+
"tabs": [],
|
|
1086
|
+
"current_tab": 0,
|
|
1087
|
+
"total_tabs": 0,
|
|
1088
|
+
}
|
|
1089
|
+
|
|
866
1090
|
async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
867
1091
|
r"""Switches to a different browser tab using its ID.
|
|
868
1092
|
|
|
@@ -884,7 +1108,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
884
1108
|
ws_wrapper = await self._get_ws_wrapper()
|
|
885
1109
|
result = await ws_wrapper.switch_tab(tab_id)
|
|
886
1110
|
|
|
887
|
-
# Add tab information
|
|
888
1111
|
tab_info = await ws_wrapper.get_tab_info()
|
|
889
1112
|
result.update(
|
|
890
1113
|
{
|
|
@@ -934,7 +1157,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
934
1157
|
ws_wrapper = await self._get_ws_wrapper()
|
|
935
1158
|
result = await ws_wrapper.close_tab(tab_id)
|
|
936
1159
|
|
|
937
|
-
# Add tab information
|
|
938
1160
|
tab_info = await ws_wrapper.get_tab_info()
|
|
939
1161
|
result.update(
|
|
940
1162
|
{
|
|
@@ -1002,6 +1224,582 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1002
1224
|
"total_tabs": 0,
|
|
1003
1225
|
}
|
|
1004
1226
|
|
|
1227
|
+
async def browser_console_view(self) -> Dict[str, Any]:
|
|
1228
|
+
r"""View current page console logs.
|
|
1229
|
+
|
|
1230
|
+
Returns:
|
|
1231
|
+
Dict[str, Any]: A dictionary with tab information:
|
|
1232
|
+
- "console_messages" (List[Dict]) : List of messages logged
|
|
1233
|
+
in the current page
|
|
1234
|
+
|
|
1235
|
+
"""
|
|
1236
|
+
try:
|
|
1237
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1238
|
+
console_logs = await ws_wrapper.console_view()
|
|
1239
|
+
|
|
1240
|
+
return {"console_messages": console_logs}
|
|
1241
|
+
except Exception as e:
|
|
1242
|
+
logger.error(f"Failed to get console view: {e}")
|
|
1243
|
+
return {"console_messages": []}
|
|
1244
|
+
|
|
1245
|
+
async def browser_console_exec(self, code: str) -> Dict[str, Any]:
|
|
1246
|
+
r"""Execute javascript code in the console of the current page and get
|
|
1247
|
+
results.
|
|
1248
|
+
|
|
1249
|
+
Args:
|
|
1250
|
+
code (str): JavaScript code to execute in the browser console.
|
|
1251
|
+
|
|
1252
|
+
Returns:
|
|
1253
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1254
|
+
- "result" (str): Confirmation of the action.
|
|
1255
|
+
- "snapshot" (str): A snapshot of the active tab after
|
|
1256
|
+
console execute action.
|
|
1257
|
+
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
1258
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1259
|
+
- "total_tabs" (int): Total number of remaining tabs.
|
|
1260
|
+
"""
|
|
1261
|
+
try:
|
|
1262
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1263
|
+
result = await ws_wrapper.console_exec(code)
|
|
1264
|
+
|
|
1265
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1266
|
+
result.update(
|
|
1267
|
+
{
|
|
1268
|
+
"tabs": tab_info,
|
|
1269
|
+
"current_tab": next(
|
|
1270
|
+
(
|
|
1271
|
+
i
|
|
1272
|
+
for i, tab in enumerate(tab_info)
|
|
1273
|
+
if tab.get("is_current")
|
|
1274
|
+
),
|
|
1275
|
+
0,
|
|
1276
|
+
),
|
|
1277
|
+
"total_tabs": len(tab_info),
|
|
1278
|
+
}
|
|
1279
|
+
)
|
|
1280
|
+
|
|
1281
|
+
return result
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
logger.error(f"Failed to execute javascript in console: {e}")
|
|
1284
|
+
return {
|
|
1285
|
+
"result": f"Error in code execution: {e}",
|
|
1286
|
+
"snapshot": "",
|
|
1287
|
+
"tabs": [],
|
|
1288
|
+
"current_tab": 0,
|
|
1289
|
+
"total_tabs": 0,
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
@high_level_action
|
|
1293
|
+
async def browser_sheet_input(
|
|
1294
|
+
self, *, cells: List[SheetCell]
|
|
1295
|
+
) -> Dict[str, Any]:
|
|
1296
|
+
r"""Input text into multiple cells in a spreadsheet (e.g., Google
|
|
1297
|
+
Sheets).
|
|
1298
|
+
|
|
1299
|
+
Args:
|
|
1300
|
+
cells (List[Dict[str, Any]]): List of cells to input, each
|
|
1301
|
+
containing:
|
|
1302
|
+
- "row" (int): Row index (0-based). Row 0 = first row,
|
|
1303
|
+
Row 1 = second row, etc.
|
|
1304
|
+
- "col" (int): Column index (0-based). Col 0 = Column A,
|
|
1305
|
+
Col 1 = Column B, etc.
|
|
1306
|
+
- "text" (str): Text to input into the cell
|
|
1307
|
+
|
|
1308
|
+
Returns:
|
|
1309
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1310
|
+
- "result" (str): Confirmation of the action with details.
|
|
1311
|
+
- "content" (str): The updated spreadsheet content (auto-read
|
|
1312
|
+
after input).
|
|
1313
|
+
- "snapshot" (str): Always empty string (sheet tools don't
|
|
1314
|
+
return snapshots).
|
|
1315
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1316
|
+
- "current_tab" (int): Index of the active tab.
|
|
1317
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1318
|
+
|
|
1319
|
+
Example:
|
|
1320
|
+
>>> cells = [
|
|
1321
|
+
... {"row": 0, "col": 0, "text": "Name"},
|
|
1322
|
+
... {"row": 0, "col": 1, "text": "Age"},
|
|
1323
|
+
... {"row": 1, "col": 0, "text": "Alice"},
|
|
1324
|
+
... {"row": 1, "col": 1, "text": "30"},
|
|
1325
|
+
... ]
|
|
1326
|
+
"""
|
|
1327
|
+
try:
|
|
1328
|
+
import platform
|
|
1329
|
+
|
|
1330
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1331
|
+
system = platform.system()
|
|
1332
|
+
|
|
1333
|
+
# Normalize cells: convert column labels to indices if needed
|
|
1334
|
+
normalized_cells = []
|
|
1335
|
+
for cell in cells:
|
|
1336
|
+
normalized_cell = cell.copy()
|
|
1337
|
+
|
|
1338
|
+
# Convert column label (A, B, C, ...) to index if it's a string
|
|
1339
|
+
col = cell.get("col", 0)
|
|
1340
|
+
if isinstance(col, str):
|
|
1341
|
+
col = col.strip().upper()
|
|
1342
|
+
# Convert A->0, B->1, ..., Z->25, AA->26, AB->27, etc.
|
|
1343
|
+
col_index = 0
|
|
1344
|
+
for char in col:
|
|
1345
|
+
col_index = col_index * 26 + (ord(char) - ord('A') + 1)
|
|
1346
|
+
normalized_cell["col"] = col_index - 1
|
|
1347
|
+
else:
|
|
1348
|
+
normalized_cell["col"] = int(col)
|
|
1349
|
+
|
|
1350
|
+
# Row is always used as-is (should be 0-based integer)
|
|
1351
|
+
normalized_cell["row"] = int(cell.get("row", 0))
|
|
1352
|
+
normalized_cell["text"] = str(cell.get("text", ""))
|
|
1353
|
+
normalized_cells.append(normalized_cell)
|
|
1354
|
+
|
|
1355
|
+
# Perform batch input
|
|
1356
|
+
input_result = await self._sheet_input_batch_js(
|
|
1357
|
+
normalized_cells, ws_wrapper, system
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
# Read sheet content after input
|
|
1361
|
+
try:
|
|
1362
|
+
read_result = await self.browser_sheet_read()
|
|
1363
|
+
return {
|
|
1364
|
+
"result": input_result["result"],
|
|
1365
|
+
"content": read_result.get("content", ""),
|
|
1366
|
+
"snapshot": "",
|
|
1367
|
+
"tabs": input_result.get("tabs", []),
|
|
1368
|
+
"current_tab": input_result.get("current_tab", 0),
|
|
1369
|
+
"total_tabs": input_result.get("total_tabs", 0),
|
|
1370
|
+
}
|
|
1371
|
+
except Exception as read_error:
|
|
1372
|
+
logger.warning(f"Failed to auto-read sheet: {read_error}")
|
|
1373
|
+
input_result["snapshot"] = ""
|
|
1374
|
+
return input_result
|
|
1375
|
+
|
|
1376
|
+
except Exception as e:
|
|
1377
|
+
logger.error(f"Failed to input to sheet: {e}")
|
|
1378
|
+
return {
|
|
1379
|
+
"result": f"Error inputting to sheet: {e}",
|
|
1380
|
+
"content": "",
|
|
1381
|
+
"snapshot": "",
|
|
1382
|
+
"tabs": [],
|
|
1383
|
+
"current_tab": 0,
|
|
1384
|
+
"total_tabs": 0,
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
async def _sheet_input_batch_js(
|
|
1388
|
+
self,
|
|
1389
|
+
cells: List[SheetCell],
|
|
1390
|
+
ws_wrapper: Any,
|
|
1391
|
+
system: str,
|
|
1392
|
+
) -> Dict[str, Any]:
|
|
1393
|
+
r"""Input to sheet using batch keyboard input with relative
|
|
1394
|
+
positioning.
|
|
1395
|
+
|
|
1396
|
+
Builds all operations and sends them in ONE command to TypeScript,
|
|
1397
|
+
which executes them and only waits for stability once at the end.
|
|
1398
|
+
"""
|
|
1399
|
+
operations: List[Dict[str, Any]] = []
|
|
1400
|
+
|
|
1401
|
+
# Go to A1 to ensure we start from a known position
|
|
1402
|
+
if system == "Darwin":
|
|
1403
|
+
operations.append({"type": "press", "keys": ["Meta", "Home"]})
|
|
1404
|
+
else:
|
|
1405
|
+
operations.append({"type": "press", "keys": ["Control", "Home"]})
|
|
1406
|
+
operations.append({"type": "wait", "delay": 310})
|
|
1407
|
+
|
|
1408
|
+
# Start at (0, 0)
|
|
1409
|
+
current_row = 0
|
|
1410
|
+
current_col = 0
|
|
1411
|
+
|
|
1412
|
+
for cell in cells:
|
|
1413
|
+
target_row = cell.get("row", 0)
|
|
1414
|
+
target_col = cell.get("col", 0)
|
|
1415
|
+
text = cell.get("text", "")
|
|
1416
|
+
|
|
1417
|
+
# Calculate relative movement needed
|
|
1418
|
+
row_diff = target_row - current_row
|
|
1419
|
+
col_diff = target_col - current_col
|
|
1420
|
+
|
|
1421
|
+
# Navigate vertically
|
|
1422
|
+
if row_diff > 0:
|
|
1423
|
+
for _ in range(row_diff):
|
|
1424
|
+
operations.append({"type": "press", "keys": ["ArrowDown"]})
|
|
1425
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1426
|
+
elif row_diff < 0:
|
|
1427
|
+
for _ in range(abs(row_diff)):
|
|
1428
|
+
operations.append({"type": "press", "keys": ["ArrowUp"]})
|
|
1429
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1430
|
+
|
|
1431
|
+
# Navigate horizontally
|
|
1432
|
+
if col_diff > 0:
|
|
1433
|
+
for _ in range(col_diff):
|
|
1434
|
+
operations.append(
|
|
1435
|
+
{"type": "press", "keys": ["ArrowRight"]}
|
|
1436
|
+
)
|
|
1437
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1438
|
+
elif col_diff < 0:
|
|
1439
|
+
for _ in range(abs(col_diff)):
|
|
1440
|
+
operations.append({"type": "press", "keys": ["ArrowLeft"]})
|
|
1441
|
+
operations.append({"type": "wait", "delay": 50})
|
|
1442
|
+
|
|
1443
|
+
# Wait after navigation if moved
|
|
1444
|
+
if row_diff != 0 or col_diff != 0:
|
|
1445
|
+
operations.append({"type": "wait", "delay": 100})
|
|
1446
|
+
|
|
1447
|
+
# Clear and input
|
|
1448
|
+
operations.append({"type": "press", "keys": ["Delete"]})
|
|
1449
|
+
operations.append({"type": "wait", "delay": 120})
|
|
1450
|
+
|
|
1451
|
+
if text:
|
|
1452
|
+
operations.append({"type": "type", "text": text, "delay": 0})
|
|
1453
|
+
operations.append({"type": "wait", "delay": 120})
|
|
1454
|
+
|
|
1455
|
+
# Press Enter to confirm
|
|
1456
|
+
operations.append({"type": "press", "keys": ["Enter"]})
|
|
1457
|
+
operations.append({"type": "wait", "delay": 130})
|
|
1458
|
+
|
|
1459
|
+
# Update current position (after Enter, cursor moves to next row)
|
|
1460
|
+
current_row = target_row + 1
|
|
1461
|
+
current_col = target_col
|
|
1462
|
+
|
|
1463
|
+
try:
|
|
1464
|
+
await ws_wrapper._send_command(
|
|
1465
|
+
'batch_keyboard_input',
|
|
1466
|
+
{'operations': operations, 'skipStabilityWait': True},
|
|
1467
|
+
)
|
|
1468
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1469
|
+
|
|
1470
|
+
return {
|
|
1471
|
+
"result": f"Successfully input to {len(cells)} cells",
|
|
1472
|
+
"snapshot": "",
|
|
1473
|
+
"tabs": tab_info,
|
|
1474
|
+
"current_tab": next(
|
|
1475
|
+
(
|
|
1476
|
+
i
|
|
1477
|
+
for i, tab in enumerate(tab_info)
|
|
1478
|
+
if tab.get("is_current")
|
|
1479
|
+
),
|
|
1480
|
+
0,
|
|
1481
|
+
),
|
|
1482
|
+
"total_tabs": len(tab_info),
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
except Exception as e:
|
|
1486
|
+
logger.error(f"Batch keyboard execution failed: {e}")
|
|
1487
|
+
return {
|
|
1488
|
+
"result": f"Error in batch keyboard execution: {e}",
|
|
1489
|
+
"snapshot": "",
|
|
1490
|
+
"tabs": [],
|
|
1491
|
+
"current_tab": 0,
|
|
1492
|
+
"total_tabs": 0,
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
def _trim_sheet_content(self, content: str) -> str:
|
|
1496
|
+
"""Trim sheet content and add row/column labels.
|
|
1497
|
+
|
|
1498
|
+
Remove all empty rows and columns, then add:
|
|
1499
|
+
- Column headers: A, B, C, D...
|
|
1500
|
+
- Row numbers: 0, 1, 2, 3...
|
|
1501
|
+
|
|
1502
|
+
Args:
|
|
1503
|
+
content (str): Raw sheet content with tabs and newlines.
|
|
1504
|
+
|
|
1505
|
+
Returns:
|
|
1506
|
+
str: Trimmed content with row/column labels.
|
|
1507
|
+
"""
|
|
1508
|
+
if not content or not content.strip():
|
|
1509
|
+
return ""
|
|
1510
|
+
|
|
1511
|
+
# Split into rows and parse into 2D array
|
|
1512
|
+
rows = content.split('\n')
|
|
1513
|
+
grid: List[List[str]] = []
|
|
1514
|
+
max_cols = 0
|
|
1515
|
+
for row_str in rows:
|
|
1516
|
+
cells = row_str.split('\t')
|
|
1517
|
+
grid.append(cells)
|
|
1518
|
+
max_cols = max(max_cols, len(cells))
|
|
1519
|
+
|
|
1520
|
+
# Pad rows to same length
|
|
1521
|
+
for row_list in grid:
|
|
1522
|
+
while len(row_list) < max_cols:
|
|
1523
|
+
row_list.append('')
|
|
1524
|
+
|
|
1525
|
+
if not grid:
|
|
1526
|
+
return ""
|
|
1527
|
+
|
|
1528
|
+
# Find non-empty rows and columns (keep original indices)
|
|
1529
|
+
non_empty_rows = []
|
|
1530
|
+
for i, row_cells in enumerate(grid):
|
|
1531
|
+
if any(cell.strip() for cell in row_cells):
|
|
1532
|
+
non_empty_rows.append(i)
|
|
1533
|
+
|
|
1534
|
+
non_empty_cols = []
|
|
1535
|
+
for j in range(max_cols):
|
|
1536
|
+
if any(grid[i][j].strip() for i in range(len(grid))):
|
|
1537
|
+
non_empty_cols.append(j)
|
|
1538
|
+
|
|
1539
|
+
# If no content found
|
|
1540
|
+
if not non_empty_rows or not non_empty_cols:
|
|
1541
|
+
return ""
|
|
1542
|
+
|
|
1543
|
+
# Extract non-empty rows and columns
|
|
1544
|
+
filtered_grid = []
|
|
1545
|
+
for i in non_empty_rows:
|
|
1546
|
+
filtered_row = [grid[i][j] for j in non_empty_cols]
|
|
1547
|
+
filtered_grid.append(filtered_row)
|
|
1548
|
+
|
|
1549
|
+
# Generate column labels using original column indices
|
|
1550
|
+
def col_label(index):
|
|
1551
|
+
label = ""
|
|
1552
|
+
while True:
|
|
1553
|
+
label = chr(65 + (index % 26)) + label
|
|
1554
|
+
index = index // 26
|
|
1555
|
+
if index == 0:
|
|
1556
|
+
break
|
|
1557
|
+
index -= 1
|
|
1558
|
+
return label
|
|
1559
|
+
|
|
1560
|
+
col_headers = [col_label(j) for j in non_empty_cols]
|
|
1561
|
+
|
|
1562
|
+
# Add column headers as first row
|
|
1563
|
+
result_rows = ['\t'.join(['', *col_headers])]
|
|
1564
|
+
|
|
1565
|
+
# Add data rows with original row numbers (0-based)
|
|
1566
|
+
for row_idx, row_data in zip(non_empty_rows, filtered_grid):
|
|
1567
|
+
result_rows.append('\t'.join([str(row_idx), *row_data]))
|
|
1568
|
+
|
|
1569
|
+
return '\n'.join(result_rows)
|
|
1570
|
+
|
|
1571
|
+
@high_level_action
|
|
1572
|
+
async def browser_sheet_read(self) -> Dict[str, Any]:
|
|
1573
|
+
r"""Read content from a spreadsheet.
|
|
1574
|
+
|
|
1575
|
+
This tool reads spreadsheet content and returns it in a structured
|
|
1576
|
+
format with row/column labels. Empty rows and columns are
|
|
1577
|
+
automatically removed.
|
|
1578
|
+
|
|
1579
|
+
Output format:
|
|
1580
|
+
- First row: Column labels (A, B, C, ..., Z, AA, AB, ...)
|
|
1581
|
+
- First column: Row numbers (0, 1, 2, 3, ...) - 0-based
|
|
1582
|
+
- Labels show ORIGINAL positions in the spreadsheet (before removing
|
|
1583
|
+
empty rows/columns)
|
|
1584
|
+
|
|
1585
|
+
Row/column indices match browser_sheet_input directly:
|
|
1586
|
+
- Row label "0" in output = row index 0 in browser_sheet_input
|
|
1587
|
+
- Column label "A" in output = col index 0 in browser_sheet_input
|
|
1588
|
+
- Column label "C" in output = col index 2 in browser_sheet_input
|
|
1589
|
+
|
|
1590
|
+
Returns:
|
|
1591
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1592
|
+
- "result" (str): Confirmation message.
|
|
1593
|
+
- "content" (str): Tab-separated spreadsheet content with
|
|
1594
|
+
row/column labels. Format:
|
|
1595
|
+
Line 1: "\tA\tB\tC" (column headers)
|
|
1596
|
+
Line 2+: "0\tdata1\tdata2\tdata3" (row number + data)
|
|
1597
|
+
- "snapshot" (str): Always empty string (sheet tools don't
|
|
1598
|
+
return snapshots).
|
|
1599
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1600
|
+
- "current_tab" (int): Index of the active tab.
|
|
1601
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1602
|
+
|
|
1603
|
+
Example output:
|
|
1604
|
+
A B
|
|
1605
|
+
0 Name Age
|
|
1606
|
+
1 Alice 30
|
|
1607
|
+
2 Bob 25
|
|
1608
|
+
"""
|
|
1609
|
+
import platform
|
|
1610
|
+
import uuid
|
|
1611
|
+
|
|
1612
|
+
ws_wrapper = await self._get_ws_wrapper()
|
|
1613
|
+
|
|
1614
|
+
# Use unique ID to avoid conflicts in parallel execution
|
|
1615
|
+
request_id = str(uuid.uuid4())
|
|
1616
|
+
var_name = f"__sheetCopy_{request_id.replace('-', '_')}"
|
|
1617
|
+
|
|
1618
|
+
try:
|
|
1619
|
+
# Step 1: Setup copy interception with multiple captures
|
|
1620
|
+
js_inject = f"""
|
|
1621
|
+
window.{var_name} = [];
|
|
1622
|
+
let copyCount = 0;
|
|
1623
|
+
const copyListener = function(e) {{
|
|
1624
|
+
try {{
|
|
1625
|
+
// Intercept clipboard data before system clipboard write
|
|
1626
|
+
// Capture from Google Sheets' setData call
|
|
1627
|
+
const originalSetData = e.clipboardData.setData.bind(
|
|
1628
|
+
e.clipboardData
|
|
1629
|
+
);
|
|
1630
|
+
let capturedText = '';
|
|
1631
|
+
|
|
1632
|
+
e.clipboardData.setData = function(type, data) {{
|
|
1633
|
+
if (type === 'text/plain') {{
|
|
1634
|
+
capturedText = data;
|
|
1635
|
+
}}
|
|
1636
|
+
// Prevent system clipboard write
|
|
1637
|
+
}};
|
|
1638
|
+
|
|
1639
|
+
// Let Google Sheets process event (calls setData)
|
|
1640
|
+
// Event propagates and Sheets tries to set clipboard
|
|
1641
|
+
setTimeout(() => {{
|
|
1642
|
+
copyCount++;
|
|
1643
|
+
window.{var_name}.push(capturedText);
|
|
1644
|
+
}}, 0);
|
|
1645
|
+
|
|
1646
|
+
// Prevent the default browser copy behavior
|
|
1647
|
+
e.preventDefault();
|
|
1648
|
+
}} catch (err) {{
|
|
1649
|
+
console.error(
|
|
1650
|
+
'[SheetRead] Failed to intercept copy data:', err
|
|
1651
|
+
);
|
|
1652
|
+
}}
|
|
1653
|
+
}};
|
|
1654
|
+
|
|
1655
|
+
document.addEventListener('copy', copyListener, true);
|
|
1656
|
+
window.{var_name}_removeListener = () => {{
|
|
1657
|
+
document.removeEventListener('copy', copyListener, true);
|
|
1658
|
+
}};
|
|
1659
|
+
|
|
1660
|
+
'Copy listener installed';
|
|
1661
|
+
"""
|
|
1662
|
+
await ws_wrapper.console_exec(js_inject)
|
|
1663
|
+
|
|
1664
|
+
system = platform.system()
|
|
1665
|
+
import asyncio
|
|
1666
|
+
|
|
1667
|
+
if system == "Darwin":
|
|
1668
|
+
select_all_copy_ops: List[Dict[str, Any]] = [
|
|
1669
|
+
{"type": "press", "keys": ["Meta", "a"]},
|
|
1670
|
+
{"type": "wait", "delay": 100},
|
|
1671
|
+
{"type": "press", "keys": ["Meta", "c"]},
|
|
1672
|
+
]
|
|
1673
|
+
await ws_wrapper._send_command(
|
|
1674
|
+
'batch_keyboard_input',
|
|
1675
|
+
{
|
|
1676
|
+
'operations': select_all_copy_ops,
|
|
1677
|
+
'skipStabilityWait': True,
|
|
1678
|
+
},
|
|
1679
|
+
)
|
|
1680
|
+
await asyncio.sleep(0.2)
|
|
1681
|
+
|
|
1682
|
+
# Repeat to capture correct one
|
|
1683
|
+
await ws_wrapper._send_command(
|
|
1684
|
+
'batch_keyboard_input',
|
|
1685
|
+
{
|
|
1686
|
+
'operations': select_all_copy_ops,
|
|
1687
|
+
'skipStabilityWait': True,
|
|
1688
|
+
},
|
|
1689
|
+
)
|
|
1690
|
+
await asyncio.sleep(0.2)
|
|
1691
|
+
else:
|
|
1692
|
+
select_all_copy_ops = [
|
|
1693
|
+
{"type": "press", "keys": ["Control", "a"]},
|
|
1694
|
+
{"type": "wait", "delay": 100},
|
|
1695
|
+
{"type": "press", "keys": ["Control", "c"]},
|
|
1696
|
+
]
|
|
1697
|
+
await ws_wrapper._send_command(
|
|
1698
|
+
'batch_keyboard_input',
|
|
1699
|
+
{
|
|
1700
|
+
'operations': select_all_copy_ops,
|
|
1701
|
+
'skipStabilityWait': True,
|
|
1702
|
+
},
|
|
1703
|
+
)
|
|
1704
|
+
await asyncio.sleep(0.2)
|
|
1705
|
+
|
|
1706
|
+
# Repeat to capture correct one
|
|
1707
|
+
await ws_wrapper._send_command(
|
|
1708
|
+
'batch_keyboard_input',
|
|
1709
|
+
{
|
|
1710
|
+
'operations': select_all_copy_ops,
|
|
1711
|
+
'skipStabilityWait': True,
|
|
1712
|
+
},
|
|
1713
|
+
)
|
|
1714
|
+
await asyncio.sleep(0.2)
|
|
1715
|
+
|
|
1716
|
+
js_check = f"window.{var_name} || []"
|
|
1717
|
+
content_result = await ws_wrapper.console_exec(js_check)
|
|
1718
|
+
result_str = content_result.get("result", "[]")
|
|
1719
|
+
|
|
1720
|
+
import json
|
|
1721
|
+
|
|
1722
|
+
if isinstance(result_str, list):
|
|
1723
|
+
captured_contents = result_str
|
|
1724
|
+
elif isinstance(result_str, str):
|
|
1725
|
+
if result_str.startswith("Console execution result: "):
|
|
1726
|
+
result_str = result_str[
|
|
1727
|
+
len("Console execution result: ") :
|
|
1728
|
+
]
|
|
1729
|
+
result_str = result_str.strip()
|
|
1730
|
+
|
|
1731
|
+
try:
|
|
1732
|
+
captured_contents = json.loads(result_str)
|
|
1733
|
+
except json.JSONDecodeError:
|
|
1734
|
+
captured_contents = []
|
|
1735
|
+
else:
|
|
1736
|
+
captured_contents = []
|
|
1737
|
+
|
|
1738
|
+
if not captured_contents:
|
|
1739
|
+
sheet_content = ""
|
|
1740
|
+
elif len(captured_contents) == 1:
|
|
1741
|
+
sheet_content = captured_contents[0]
|
|
1742
|
+
else:
|
|
1743
|
+
|
|
1744
|
+
def count_non_empty_cells(content):
|
|
1745
|
+
if not content:
|
|
1746
|
+
return 0
|
|
1747
|
+
count = 0
|
|
1748
|
+
for line in content.split('\n'):
|
|
1749
|
+
for cell in line.split('\t'):
|
|
1750
|
+
if cell.strip():
|
|
1751
|
+
count += 1
|
|
1752
|
+
return count
|
|
1753
|
+
|
|
1754
|
+
counts = [
|
|
1755
|
+
count_non_empty_cells(content)
|
|
1756
|
+
for content in captured_contents[:2]
|
|
1757
|
+
]
|
|
1758
|
+
best_idx = 0 if counts[0] > counts[1] else 1
|
|
1759
|
+
sheet_content = captured_contents[best_idx]
|
|
1760
|
+
|
|
1761
|
+
sheet_content = self._trim_sheet_content(sheet_content)
|
|
1762
|
+
|
|
1763
|
+
tab_info = await ws_wrapper.get_tab_info()
|
|
1764
|
+
|
|
1765
|
+
return {
|
|
1766
|
+
"result": "Successfully read spreadsheet content",
|
|
1767
|
+
"content": sheet_content,
|
|
1768
|
+
"snapshot": "", # Sheet tools don't return snapshots
|
|
1769
|
+
"tabs": tab_info,
|
|
1770
|
+
"current_tab": next(
|
|
1771
|
+
(
|
|
1772
|
+
i
|
|
1773
|
+
for i, tab in enumerate(tab_info)
|
|
1774
|
+
if tab.get("is_current")
|
|
1775
|
+
),
|
|
1776
|
+
0,
|
|
1777
|
+
),
|
|
1778
|
+
"total_tabs": len(tab_info),
|
|
1779
|
+
}
|
|
1780
|
+
|
|
1781
|
+
except Exception as e:
|
|
1782
|
+
logger.error(f"Failed to read sheet: {e}")
|
|
1783
|
+
return {
|
|
1784
|
+
"result": f"Error reading sheet: {e}",
|
|
1785
|
+
"content": "",
|
|
1786
|
+
"snapshot": "",
|
|
1787
|
+
"tabs": [],
|
|
1788
|
+
"current_tab": 0,
|
|
1789
|
+
"total_tabs": 0,
|
|
1790
|
+
}
|
|
1791
|
+
finally:
|
|
1792
|
+
js_cleanup = f"""
|
|
1793
|
+
if (window.{var_name}_removeListener) {{
|
|
1794
|
+
window.{var_name}_removeListener();
|
|
1795
|
+
}}
|
|
1796
|
+
delete window.{var_name};
|
|
1797
|
+
delete window.{var_name}_removeListener;
|
|
1798
|
+
'cleaned'
|
|
1799
|
+
"""
|
|
1800
|
+
with contextlib.suppress(Exception):
|
|
1801
|
+
await ws_wrapper.console_exec(js_cleanup)
|
|
1802
|
+
|
|
1005
1803
|
# Additional methods for backward compatibility
|
|
1006
1804
|
async def browser_wait_user(
|
|
1007
1805
|
self, timeout_sec: Optional[float] = None
|
|
@@ -1113,7 +1911,6 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1113
1911
|
headless=self._headless,
|
|
1114
1912
|
user_data_dir=self._user_data_dir,
|
|
1115
1913
|
stealth=self._stealth,
|
|
1116
|
-
web_agent_model=self._web_agent_model,
|
|
1117
1914
|
cache_dir=f"{self._cache_dir.rstrip('/')}_clone_"
|
|
1118
1915
|
f"{new_session_id}/",
|
|
1119
1916
|
enabled_tools=self.enabled_tools.copy(),
|
|
@@ -1127,6 +1924,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1127
1924
|
screenshot_timeout=self._screenshot_timeout,
|
|
1128
1925
|
page_stability_timeout=self._page_stability_timeout,
|
|
1129
1926
|
dom_content_loaded_timeout=self._dom_content_loaded_timeout,
|
|
1927
|
+
viewport_limit=self._viewport_limit,
|
|
1928
|
+
full_visual_mode=self._full_visual_mode,
|
|
1130
1929
|
)
|
|
1131
1930
|
|
|
1132
1931
|
def get_tools(self) -> List[FunctionTool]:
|
|
@@ -1146,25 +1945,22 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
|
1146
1945
|
"browser_select": self.browser_select,
|
|
1147
1946
|
"browser_scroll": self.browser_scroll,
|
|
1148
1947
|
"browser_enter": self.browser_enter,
|
|
1948
|
+
"browser_mouse_control": self.browser_mouse_control,
|
|
1949
|
+
"browser_mouse_drag": self.browser_mouse_drag,
|
|
1950
|
+
"browser_press_key": self.browser_press_key,
|
|
1149
1951
|
"browser_wait_user": self.browser_wait_user,
|
|
1150
1952
|
"browser_switch_tab": self.browser_switch_tab,
|
|
1151
1953
|
"browser_close_tab": self.browser_close_tab,
|
|
1152
1954
|
"browser_get_tab_info": self.browser_get_tab_info,
|
|
1955
|
+
"browser_console_view": self.browser_console_view,
|
|
1956
|
+
"browser_console_exec": self.browser_console_exec,
|
|
1957
|
+
"browser_sheet_input": self.browser_sheet_input,
|
|
1958
|
+
"browser_sheet_read": self.browser_sheet_read,
|
|
1153
1959
|
}
|
|
1154
1960
|
|
|
1155
1961
|
enabled_tools = []
|
|
1156
1962
|
|
|
1157
1963
|
for tool_name in self.enabled_tools:
|
|
1158
|
-
if (
|
|
1159
|
-
tool_name == "browser_solve_task"
|
|
1160
|
-
and self._web_agent_model is None
|
|
1161
|
-
):
|
|
1162
|
-
logger.warning(
|
|
1163
|
-
f"Tool '{tool_name}' is enabled but web_agent_model "
|
|
1164
|
-
f"is not provided. Skipping this tool."
|
|
1165
|
-
)
|
|
1166
|
-
continue
|
|
1167
|
-
|
|
1168
1964
|
if tool_name in tool_map:
|
|
1169
1965
|
tool = FunctionTool(
|
|
1170
1966
|
cast(Callable[..., Any], tool_map[tool_name])
|