camel-ai 0.2.73a2__py3-none-any.whl → 0.2.73a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/storages/vectordb_storages/__init__.py +1 -0
- camel/storages/vectordb_storages/surreal.py +100 -150
- camel/toolkits/excel_toolkit.py +153 -64
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +76 -27
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +215 -132
- camel/toolkits/message_integration.py +174 -47
- camel/toolkits/web_deploy_toolkit.py +207 -12
- {camel_ai-0.2.73a2.dist-info → camel_ai-0.2.73a4.dist-info}/METADATA +3 -1
- {camel_ai-0.2.73a2.dist-info → camel_ai-0.2.73a4.dist-info}/RECORD +12 -12
- {camel_ai-0.2.73a2.dist-info → camel_ai-0.2.73a4.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a2.dist-info → camel_ai-0.2.73a4.dist-info}/licenses/LICENSE +0 -0
|
@@ -23,7 +23,7 @@ from typing import Any, Callable, ClassVar, Dict, List, Optional, cast
|
|
|
23
23
|
|
|
24
24
|
from camel.logger import get_logger
|
|
25
25
|
from camel.models import BaseModelBackend
|
|
26
|
-
from camel.toolkits.base import BaseToolkit
|
|
26
|
+
from camel.toolkits.base import BaseToolkit, RegisteredAgentToolkit
|
|
27
27
|
from camel.toolkits.function_tool import FunctionTool
|
|
28
28
|
from camel.utils import sanitize_filename
|
|
29
29
|
from camel.utils.commons import dependencies_required
|
|
@@ -35,7 +35,7 @@ from .config_loader import ConfigLoader
|
|
|
35
35
|
logger = get_logger(__name__)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
class HybridBrowserToolkit(BaseToolkit):
|
|
38
|
+
class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
|
|
39
39
|
r"""A hybrid browser toolkit that combines non-visual, DOM-based browser
|
|
40
40
|
automation with visual, screenshot-based capabilities.
|
|
41
41
|
|
|
@@ -48,36 +48,36 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
48
48
|
|
|
49
49
|
# Default tool list - core browser functionality
|
|
50
50
|
DEFAULT_TOOLS: ClassVar[List[str]] = [
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
"
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
"
|
|
51
|
+
"browser_open",
|
|
52
|
+
"browser_close",
|
|
53
|
+
"browser_visit_page",
|
|
54
|
+
"browser_back",
|
|
55
|
+
"browser_forward",
|
|
56
|
+
"browser_click",
|
|
57
|
+
"browser_type",
|
|
58
|
+
"browser_switch_tab",
|
|
59
59
|
]
|
|
60
60
|
|
|
61
61
|
# All available tools
|
|
62
62
|
ALL_TOOLS: ClassVar[List[str]] = [
|
|
63
|
-
"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
"
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
63
|
+
"browser_open",
|
|
64
|
+
"browser_close",
|
|
65
|
+
"browser_visit_page",
|
|
66
|
+
"browser_back",
|
|
67
|
+
"browser_forward",
|
|
68
|
+
"browser_get_page_snapshot",
|
|
69
|
+
"browser_get_som_screenshot",
|
|
70
|
+
"browser_get_page_links",
|
|
71
|
+
"browser_click",
|
|
72
|
+
"browser_type",
|
|
73
|
+
"browser_select",
|
|
74
|
+
"browser_scroll",
|
|
75
|
+
"browser_enter",
|
|
76
|
+
"browser_wait_user",
|
|
77
|
+
"browser_solve_task",
|
|
78
|
+
"browser_switch_tab",
|
|
79
|
+
"browser_close_tab",
|
|
80
|
+
"browser_get_tab_info",
|
|
81
81
|
]
|
|
82
82
|
|
|
83
83
|
def __init__(
|
|
@@ -109,7 +109,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
109
109
|
browser data like cookies and local storage. Useful for
|
|
110
110
|
maintaining sessions across runs. Defaults to `None` (a
|
|
111
111
|
temporary directory is used).
|
|
112
|
-
stealth (bool): Whether to run the browser in stealth mode to
|
|
112
|
+
stealth (bool): Whether to run the browser in stealth mode to
|
|
113
|
+
avoid
|
|
113
114
|
bot detection. When enabled, hides WebDriver characteristics,
|
|
114
115
|
spoofs navigator properties, and implements various
|
|
115
116
|
anti-detection
|
|
@@ -121,11 +122,15 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
121
122
|
Defaults to `None`.
|
|
122
123
|
cache_dir (str): The directory to store cached files, such as
|
|
123
124
|
screenshots. Defaults to `"tmp/"`.
|
|
124
|
-
enabled_tools (Optional[List[str]]): List of tool names to
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
125
|
+
enabled_tools (Optional[List[str]]): List of tool names to
|
|
126
|
+
enable.
|
|
127
|
+
If None, uses DEFAULT_TOOLS. Available tools: browser_open,
|
|
128
|
+
browser_close, browser_visit_page, browser_back,
|
|
129
|
+
browser_forward, browser_get_page_snapshot,
|
|
130
|
+
browser_get_som_screenshot, browser_get_page_links,
|
|
131
|
+
browser_click, browser_type, browser_select,
|
|
132
|
+
browser_scroll, browser_enter, browser_wait_user,
|
|
133
|
+
browser_solve_task.
|
|
129
134
|
Defaults to `None`.
|
|
130
135
|
browser_log_to_file (bool): Whether to save detailed browser
|
|
131
136
|
action logs to file.
|
|
@@ -134,12 +139,14 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
134
139
|
Logs are saved to an auto-generated timestamped file.
|
|
135
140
|
Defaults to `False`.
|
|
136
141
|
session_id (Optional[str]): A unique identifier for this browser
|
|
137
|
-
session. When multiple HybridBrowserToolkit instances are
|
|
142
|
+
session. When multiple HybridBrowserToolkit instances are
|
|
143
|
+
used
|
|
138
144
|
concurrently, different session IDs prevent them from sharing
|
|
139
145
|
the same browser session and causing conflicts. If None, a
|
|
140
146
|
default session will be used. Defaults to `None`.
|
|
141
147
|
default_start_url (str): The default URL to navigate to when
|
|
142
|
-
open_browser() is called without a start_url parameter or
|
|
148
|
+
open_browser() is called without a start_url parameter or
|
|
149
|
+
with
|
|
143
150
|
None. Defaults to `"https://google.com/"`.
|
|
144
151
|
default_timeout (Optional[int]): Default timeout in milliseconds
|
|
145
152
|
for browser actions. If None, uses environment variable
|
|
@@ -177,6 +184,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
177
184
|
Defaults to `None`.
|
|
178
185
|
"""
|
|
179
186
|
super().__init__()
|
|
187
|
+
RegisteredAgentToolkit.__init__(self)
|
|
180
188
|
self._headless = headless
|
|
181
189
|
self._user_data_dir = user_data_dir
|
|
182
190
|
self._stealth = stealth
|
|
@@ -267,7 +275,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
267
275
|
# Use the session directly - singleton logic is handled in
|
|
268
276
|
# ensure_browser
|
|
269
277
|
self._session = temp_session
|
|
270
|
-
self.
|
|
278
|
+
self._playwright_agent: Optional[PlaywrightLLMAgent] = None
|
|
271
279
|
self._unified_script = self._load_unified_analyzer()
|
|
272
280
|
|
|
273
281
|
@property
|
|
@@ -403,9 +411,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
403
411
|
if self.log_to_console:
|
|
404
412
|
log_msg = f"[BROWSER ACTION] {action_name}"
|
|
405
413
|
if self.enable_timing_logging:
|
|
406
|
-
log_msg +=
|
|
414
|
+
log_msg += (
|
|
415
|
+
f" | Execution: " f"{log_entry['execution_time_ms']}ms"
|
|
416
|
+
)
|
|
407
417
|
if page_load_time is not None and self.enable_page_loading_logging:
|
|
408
|
-
log_msg +=
|
|
418
|
+
log_msg += (
|
|
419
|
+
f" | Page Load: " f"{log_entry['page_load_time_ms']}ms"
|
|
420
|
+
)
|
|
409
421
|
if error:
|
|
410
422
|
log_msg += f" | ERROR: {error}"
|
|
411
423
|
|
|
@@ -570,7 +582,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
570
582
|
# Success - return result
|
|
571
583
|
if attempt > 0:
|
|
572
584
|
logger.debug(
|
|
573
|
-
f"Unified analysis succeeded on attempt
|
|
585
|
+
f"Unified analysis succeeded on attempt "
|
|
586
|
+
f"{attempt + 1}"
|
|
574
587
|
)
|
|
575
588
|
return result
|
|
576
589
|
|
|
@@ -591,7 +604,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
591
604
|
f"{attempt + 1}/{max_retries}): {e}. Retrying..."
|
|
592
605
|
)
|
|
593
606
|
|
|
594
|
-
# Wait a bit for page stability before retrying (
|
|
607
|
+
# Wait a bit for page stability before retrying (
|
|
608
|
+
# optimized)
|
|
595
609
|
try:
|
|
596
610
|
await page.wait_for_load_state(
|
|
597
611
|
'domcontentloaded',
|
|
@@ -774,7 +788,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
774
788
|
}
|
|
775
789
|
except Exception as e:
|
|
776
790
|
logger.warning(
|
|
777
|
-
f"Failed to get tab info from session: {type(e).__name__}:
|
|
791
|
+
f"Failed to get tab info from session: {type(e).__name__}: "
|
|
792
|
+
f"{e}"
|
|
778
793
|
)
|
|
779
794
|
|
|
780
795
|
# Try to get actual tab count from session pages directly
|
|
@@ -816,7 +831,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
816
831
|
f"{len(fallback_session._pages)} total"
|
|
817
832
|
)
|
|
818
833
|
except Exception:
|
|
819
|
-
# Keep the original count if we can't check page
|
|
834
|
+
# Keep the original count if we can't check page
|
|
835
|
+
# status
|
|
820
836
|
pass
|
|
821
837
|
|
|
822
838
|
if actual_tab_count == 0:
|
|
@@ -827,7 +843,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
827
843
|
):
|
|
828
844
|
actual_tab_count = 1
|
|
829
845
|
logger.debug(
|
|
830
|
-
"No pages in list but main page exists,
|
|
846
|
+
"No pages in list but main page exists, "
|
|
847
|
+
"assuming "
|
|
831
848
|
"1 tab"
|
|
832
849
|
)
|
|
833
850
|
else:
|
|
@@ -872,7 +889,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
872
889
|
)
|
|
873
890
|
before_snapshot_time = time.time() - snapshot_start_before
|
|
874
891
|
logger.info(
|
|
875
|
-
f"Pre-action snapshot captured in
|
|
892
|
+
f"Pre-action snapshot captured in "
|
|
893
|
+
f"{before_snapshot_time:.2f}s"
|
|
876
894
|
)
|
|
877
895
|
|
|
878
896
|
# Execute action
|
|
@@ -976,7 +994,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
976
994
|
**tab_info, # Include tab information
|
|
977
995
|
}
|
|
978
996
|
|
|
979
|
-
# If snapshot is unchanged after click, add element details to
|
|
997
|
+
# If snapshot is unchanged after click, add element details to
|
|
998
|
+
# log
|
|
980
999
|
if (
|
|
981
1000
|
snapshot == "snapshot not changed"
|
|
982
1001
|
and action_type == "click"
|
|
@@ -1076,17 +1095,17 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1076
1095
|
"web_agent_model required for high-level task planning"
|
|
1077
1096
|
)
|
|
1078
1097
|
|
|
1079
|
-
if self.
|
|
1080
|
-
self.
|
|
1098
|
+
if self._playwright_agent is None:
|
|
1099
|
+
self._playwright_agent = PlaywrightLLMAgent(
|
|
1081
1100
|
headless=self._headless,
|
|
1082
1101
|
user_data_dir=self._user_data_dir,
|
|
1083
1102
|
model_backend=self._web_agent_model,
|
|
1084
1103
|
)
|
|
1085
|
-
return self.
|
|
1104
|
+
return self._playwright_agent
|
|
1086
1105
|
|
|
1087
1106
|
# Public API Methods
|
|
1088
1107
|
|
|
1089
|
-
async def
|
|
1108
|
+
async def browser_open(self) -> Dict[str, Any]:
|
|
1090
1109
|
r"""Starts a new browser session. This must be the first browser
|
|
1091
1110
|
action.
|
|
1092
1111
|
|
|
@@ -1096,7 +1115,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1096
1115
|
Returns:
|
|
1097
1116
|
Dict[str, Any]: A dictionary with the result of the action:
|
|
1098
1117
|
- "result" (str): Confirmation of the action.
|
|
1099
|
-
- "snapshot" (str): A textual snapshot of interactive
|
|
1118
|
+
- "snapshot" (str): A textual snapshot of interactive
|
|
1119
|
+
elements.
|
|
1100
1120
|
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1101
1121
|
- "current_tab" (int): Index of the active tab.
|
|
1102
1122
|
- "total_tabs" (int): Total number of open tabs.
|
|
@@ -1118,13 +1138,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1118
1138
|
logger.info(f"Navigating to configured default page: {start_url}")
|
|
1119
1139
|
|
|
1120
1140
|
# Use visit_page without creating a new tab
|
|
1121
|
-
result = await self.
|
|
1141
|
+
result = await self.browser_visit_page(url=start_url)
|
|
1122
1142
|
|
|
1123
1143
|
# Log success
|
|
1124
1144
|
if self.enable_action_logging or self.enable_timing_logging:
|
|
1125
1145
|
execution_time = time.time() - action_start
|
|
1126
1146
|
await self._log_action(
|
|
1127
|
-
action_name="
|
|
1147
|
+
action_name="browser_open",
|
|
1128
1148
|
inputs=inputs,
|
|
1129
1149
|
outputs={
|
|
1130
1150
|
"result": "Browser opened and navigated to "
|
|
@@ -1140,7 +1160,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1140
1160
|
if self.enable_action_logging or self.enable_timing_logging:
|
|
1141
1161
|
execution_time = time.time() - action_start
|
|
1142
1162
|
await self._log_action(
|
|
1143
|
-
action_name="
|
|
1163
|
+
action_name="browser_open",
|
|
1144
1164
|
inputs=inputs,
|
|
1145
1165
|
outputs=None,
|
|
1146
1166
|
execution_time=execution_time,
|
|
@@ -1149,7 +1169,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1149
1169
|
raise
|
|
1150
1170
|
|
|
1151
1171
|
@action_logger
|
|
1152
|
-
async def
|
|
1172
|
+
async def browser_close(self) -> str:
|
|
1153
1173
|
r"""Closes the browser session, releasing all resources.
|
|
1154
1174
|
|
|
1155
1175
|
This should be called at the end of a task for cleanup.
|
|
@@ -1157,18 +1177,18 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1157
1177
|
Returns:
|
|
1158
1178
|
str: A confirmation message.
|
|
1159
1179
|
"""
|
|
1160
|
-
if self.
|
|
1180
|
+
if self._playwright_agent is not None:
|
|
1161
1181
|
try:
|
|
1162
|
-
await self.
|
|
1182
|
+
await self._playwright_agent.close()
|
|
1163
1183
|
except Exception:
|
|
1164
1184
|
pass
|
|
1165
|
-
self.
|
|
1185
|
+
self._playwright_agent = None
|
|
1166
1186
|
|
|
1167
1187
|
await self._session.close()
|
|
1168
1188
|
return "Browser session closed."
|
|
1169
1189
|
|
|
1170
1190
|
@action_logger
|
|
1171
|
-
async def
|
|
1191
|
+
async def browser_visit_page(self, url: str) -> Dict[str, Any]:
|
|
1172
1192
|
r"""Opens a URL in a new browser tab and switches to it.
|
|
1173
1193
|
|
|
1174
1194
|
Args:
|
|
@@ -1202,7 +1222,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1202
1222
|
# By default, we want to create a new tab.
|
|
1203
1223
|
should_create_new_tab = True
|
|
1204
1224
|
try:
|
|
1205
|
-
# If the browser has just started with a single "about:blank"
|
|
1225
|
+
# If the browser has just started with a single "about:blank"
|
|
1226
|
+
# tab,
|
|
1206
1227
|
# use that tab instead of creating a new one.
|
|
1207
1228
|
tab_info_data = await self._get_tab_info_for_output()
|
|
1208
1229
|
tabs = tab_info_data.get("tabs", [])
|
|
@@ -1246,7 +1267,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1246
1267
|
return {"result": nav_result, "snapshot": snapshot, **tab_info}
|
|
1247
1268
|
|
|
1248
1269
|
@action_logger
|
|
1249
|
-
async def
|
|
1270
|
+
async def browser_back(self) -> Dict[str, Any]:
|
|
1250
1271
|
r"""Goes back to the previous page in the browser history.
|
|
1251
1272
|
|
|
1252
1273
|
This action simulates using the browser's "back" button in the
|
|
@@ -1271,7 +1292,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1271
1292
|
nav_time = time.time() - nav_start
|
|
1272
1293
|
logger.info(f"Back navigation completed in {nav_time:.2f}s")
|
|
1273
1294
|
|
|
1274
|
-
# Minimal wait for page stability (back navigation is usually
|
|
1295
|
+
# Minimal wait for page stability (back navigation is usually
|
|
1296
|
+
# fast)
|
|
1275
1297
|
import asyncio
|
|
1276
1298
|
|
|
1277
1299
|
await asyncio.sleep(0.2)
|
|
@@ -1310,7 +1332,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1310
1332
|
}
|
|
1311
1333
|
|
|
1312
1334
|
@action_logger
|
|
1313
|
-
async def
|
|
1335
|
+
async def browser_forward(self) -> Dict[str, Any]:
|
|
1314
1336
|
r"""Goes forward to the next page in the browser history.
|
|
1315
1337
|
|
|
1316
1338
|
This action simulates using the browser's "forward" button in the
|
|
@@ -1349,7 +1371,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1349
1371
|
)
|
|
1350
1372
|
snapshot_time = time.time() - snapshot_start
|
|
1351
1373
|
logger.info(
|
|
1352
|
-
f"Forward navigation snapshot captured in
|
|
1374
|
+
f"Forward navigation snapshot captured in "
|
|
1375
|
+
f"{snapshot_time:.2f}s"
|
|
1353
1376
|
)
|
|
1354
1377
|
|
|
1355
1378
|
# Get tab information
|
|
@@ -1375,10 +1398,11 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1375
1398
|
}
|
|
1376
1399
|
|
|
1377
1400
|
@action_logger
|
|
1378
|
-
async def
|
|
1401
|
+
async def browser_get_page_snapshot(self) -> str:
|
|
1379
1402
|
r"""Gets a textual snapshot of the page's interactive elements.
|
|
1380
1403
|
|
|
1381
|
-
The snapshot lists elements like buttons, links, and inputs,
|
|
1404
|
+
The snapshot lists elements like buttons, links, and inputs,
|
|
1405
|
+
each with
|
|
1382
1406
|
a unique `ref` ID. This ID is used by other tools (e.g., `click`,
|
|
1383
1407
|
`type`) to interact with a specific element. This tool provides no
|
|
1384
1408
|
visual information.
|
|
@@ -1407,18 +1431,33 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1407
1431
|
|
|
1408
1432
|
@dependencies_required('PIL')
|
|
1409
1433
|
@action_logger
|
|
1410
|
-
async def
|
|
1434
|
+
async def browser_get_som_screenshot(
|
|
1435
|
+
self,
|
|
1436
|
+
read_image: bool = True,
|
|
1437
|
+
instruction: Optional[str] = None,
|
|
1438
|
+
):
|
|
1411
1439
|
r"""Captures a screenshot with interactive elements highlighted.
|
|
1412
1440
|
|
|
1413
|
-
"SoM" stands for "Set of Marks". This tool takes a screenshot and
|
|
1441
|
+
"SoM" stands for "Set of Marks". This tool takes a screenshot and
|
|
1442
|
+
draws
|
|
1414
1443
|
boxes around clickable elements, overlaying a `ref` ID on each. Use
|
|
1415
1444
|
this for a visual understanding of the page, especially when the
|
|
1416
1445
|
textual snapshot is not enough.
|
|
1417
1446
|
|
|
1447
|
+
Args:
|
|
1448
|
+
read_image (bool, optional): If `True`, the agent will analyze
|
|
1449
|
+
the screenshot. Requires agent to be registered.
|
|
1450
|
+
(default: :obj:`True`)
|
|
1451
|
+
instruction (Optional[str], optional): A specific question or
|
|
1452
|
+
command for the agent regarding the screenshot, used only if
|
|
1453
|
+
`read_image` is `True`. For example: "Find the login button."
|
|
1454
|
+
|
|
1418
1455
|
Returns:
|
|
1419
1456
|
str: A summary message including the file path of the saved
|
|
1420
1457
|
screenshot, e.g., "Visual webpage screenshot captured with 42
|
|
1421
|
-
interactive elements and saved to /path/to/screenshot.png"
|
|
1458
|
+
interactive elements and saved to /path/to/screenshot.png",
|
|
1459
|
+
and optionally the agent's analysis if `read_image` is
|
|
1460
|
+
`True`.
|
|
1422
1461
|
"""
|
|
1423
1462
|
from PIL import Image
|
|
1424
1463
|
|
|
@@ -1465,12 +1504,44 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1465
1504
|
|
|
1466
1505
|
text_result = (
|
|
1467
1506
|
f"Visual webpage screenshot captured with {len(rects)} "
|
|
1468
|
-
f"interactive elements
|
|
1507
|
+
f"interactive elements."
|
|
1469
1508
|
)
|
|
1470
1509
|
|
|
1510
|
+
# Analyze image if requested and agent is registered
|
|
1511
|
+
if read_image and file_path:
|
|
1512
|
+
if self.agent is None:
|
|
1513
|
+
logger.error(
|
|
1514
|
+
"Cannot analyze screenshot: No agent registered. "
|
|
1515
|
+
"Please pass this toolkit to ChatAgent via "
|
|
1516
|
+
"toolkits_to_register_agent parameter."
|
|
1517
|
+
)
|
|
1518
|
+
text_result += (
|
|
1519
|
+
" Error: No agent registered for image analysis. "
|
|
1520
|
+
"Please pass this toolkit to ChatAgent via "
|
|
1521
|
+
"toolkits_to_register_agent parameter."
|
|
1522
|
+
)
|
|
1523
|
+
else:
|
|
1524
|
+
try:
|
|
1525
|
+
# Load the image and create a message
|
|
1526
|
+
from camel.messages import BaseMessage
|
|
1527
|
+
|
|
1528
|
+
img = Image.open(file_path)
|
|
1529
|
+
inst = instruction if instruction is not None else ""
|
|
1530
|
+
message = BaseMessage.make_user_message(
|
|
1531
|
+
role_name="User",
|
|
1532
|
+
content=inst,
|
|
1533
|
+
image_list=[img],
|
|
1534
|
+
)
|
|
1535
|
+
|
|
1536
|
+
# Get agent's analysis
|
|
1537
|
+
await self.agent.astep(message)
|
|
1538
|
+
except Exception as e:
|
|
1539
|
+
logger.error(f"Error analyzing screenshot: {e}")
|
|
1540
|
+
text_result += f". Error analyzing screenshot: {e}"
|
|
1541
|
+
|
|
1471
1542
|
return text_result
|
|
1472
1543
|
|
|
1473
|
-
async def
|
|
1544
|
+
async def browser_click(self, *, ref: str) -> Dict[str, Any]:
|
|
1474
1545
|
r"""Performs a click on an element on the page.
|
|
1475
1546
|
|
|
1476
1547
|
Args:
|
|
@@ -1514,7 +1585,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1514
1585
|
|
|
1515
1586
|
return result
|
|
1516
1587
|
|
|
1517
|
-
async def
|
|
1588
|
+
async def browser_type(self, *, ref: str, text: str) -> Dict[str, Any]:
|
|
1518
1589
|
r"""Types text into an input element on the page.
|
|
1519
1590
|
|
|
1520
1591
|
Args:
|
|
@@ -1542,7 +1613,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1542
1613
|
|
|
1543
1614
|
return result
|
|
1544
1615
|
|
|
1545
|
-
async def
|
|
1616
|
+
async def browser_select(self, *, ref: str, value: str) -> Dict[str, Any]:
|
|
1546
1617
|
r"""Selects an option in a dropdown (`<select>`) element.
|
|
1547
1618
|
|
|
1548
1619
|
Args:
|
|
@@ -1571,7 +1642,9 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1571
1642
|
|
|
1572
1643
|
return result
|
|
1573
1644
|
|
|
1574
|
-
async def
|
|
1645
|
+
async def browser_scroll(
|
|
1646
|
+
self, *, direction: str, amount: int
|
|
1647
|
+
) -> Dict[str, Any]:
|
|
1575
1648
|
r"""Scrolls the current page window.
|
|
1576
1649
|
|
|
1577
1650
|
Args:
|
|
@@ -1603,8 +1676,9 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1603
1676
|
|
|
1604
1677
|
return result
|
|
1605
1678
|
|
|
1606
|
-
async def
|
|
1607
|
-
r"""Simulates pressing the Enter key on the currently focused
|
|
1679
|
+
async def browser_enter(self) -> Dict[str, Any]:
|
|
1680
|
+
r"""Simulates pressing the Enter key on the currently focused
|
|
1681
|
+
element.
|
|
1608
1682
|
|
|
1609
1683
|
This is useful for submitting forms or search queries after using the
|
|
1610
1684
|
`type` tool.
|
|
@@ -1630,12 +1704,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1630
1704
|
return result
|
|
1631
1705
|
|
|
1632
1706
|
@action_logger
|
|
1633
|
-
async def
|
|
1707
|
+
async def browser_wait_user(
|
|
1634
1708
|
self, timeout_sec: Optional[float] = None
|
|
1635
1709
|
) -> Dict[str, Any]:
|
|
1636
1710
|
r"""Pauses execution and waits for human input from the console.
|
|
1637
1711
|
|
|
1638
|
-
Use this for tasks requiring manual steps, like solving a CAPTCHA.
|
|
1712
|
+
Use this for tasks requiring manual steps, like solving a CAPTCHA.
|
|
1713
|
+
The
|
|
1639
1714
|
agent will resume after the user presses Enter in the console.
|
|
1640
1715
|
|
|
1641
1716
|
Args:
|
|
@@ -1694,7 +1769,9 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1694
1769
|
return {"result": result_msg, "snapshot": snapshot, **tab_info}
|
|
1695
1770
|
|
|
1696
1771
|
@action_logger
|
|
1697
|
-
async def
|
|
1772
|
+
async def browser_get_page_links(
|
|
1773
|
+
self, *, ref: List[str]
|
|
1774
|
+
) -> Dict[str, Any]:
|
|
1698
1775
|
r"""Gets the destination URLs for a list of link elements.
|
|
1699
1776
|
|
|
1700
1777
|
This is useful to know where a link goes before clicking it.
|
|
@@ -1724,12 +1801,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1724
1801
|
return {"links": links}
|
|
1725
1802
|
|
|
1726
1803
|
@action_logger
|
|
1727
|
-
async def
|
|
1804
|
+
async def browser_solve_task(
|
|
1728
1805
|
self, task_prompt: str, start_url: str, max_steps: int = 15
|
|
1729
1806
|
) -> str:
|
|
1730
1807
|
r"""Delegates a complex, high-level task to a specialized web agent.
|
|
1731
1808
|
|
|
1732
|
-
Use this for multi-step tasks that can be described in a single
|
|
1809
|
+
Use this for multi-step tasks that can be described in a single
|
|
1810
|
+
prompt
|
|
1733
1811
|
(e.g., "log into my account and check for new messages"). The agent
|
|
1734
1812
|
will autonomously perform the necessary browser actions.
|
|
1735
1813
|
|
|
@@ -1794,52 +1872,6 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1794
1872
|
self.log_buffer.clear()
|
|
1795
1873
|
logger.info("Log buffer cleared")
|
|
1796
1874
|
|
|
1797
|
-
def get_tools(self) -> List[FunctionTool]:
|
|
1798
|
-
r"""Get available function tools
|
|
1799
|
-
based on enabled_tools configuration."""
|
|
1800
|
-
# Map tool names to their corresponding methods
|
|
1801
|
-
tool_map = {
|
|
1802
|
-
"open_browser": self.open_browser,
|
|
1803
|
-
"close_browser": self.close_browser,
|
|
1804
|
-
"visit_page": self.visit_page,
|
|
1805
|
-
"back": self.back,
|
|
1806
|
-
"forward": self.forward,
|
|
1807
|
-
"get_page_snapshot": self.get_page_snapshot,
|
|
1808
|
-
"get_som_screenshot": self.get_som_screenshot,
|
|
1809
|
-
"get_page_links": self.get_page_links,
|
|
1810
|
-
"click": self.click,
|
|
1811
|
-
"type": self.type,
|
|
1812
|
-
"select": self.select,
|
|
1813
|
-
"scroll": self.scroll,
|
|
1814
|
-
"enter": self.enter,
|
|
1815
|
-
"wait_user": self.wait_user,
|
|
1816
|
-
"solve_task": self.solve_task,
|
|
1817
|
-
"switch_tab": self.switch_tab,
|
|
1818
|
-
"close_tab": self.close_tab,
|
|
1819
|
-
"get_tab_info": self.get_tab_info,
|
|
1820
|
-
}
|
|
1821
|
-
|
|
1822
|
-
enabled_tools = []
|
|
1823
|
-
|
|
1824
|
-
for tool_name in self.enabled_tools:
|
|
1825
|
-
if tool_name == "solve_task" and self._web_agent_model is None:
|
|
1826
|
-
logger.warning(
|
|
1827
|
-
f"Tool '{tool_name}' is enabled but web_agent_model "
|
|
1828
|
-
f"is not provided. Skipping this tool."
|
|
1829
|
-
)
|
|
1830
|
-
continue
|
|
1831
|
-
|
|
1832
|
-
if tool_name in tool_map:
|
|
1833
|
-
tool = FunctionTool(
|
|
1834
|
-
cast(Callable[..., Any], tool_map[tool_name])
|
|
1835
|
-
)
|
|
1836
|
-
enabled_tools.append(tool)
|
|
1837
|
-
else:
|
|
1838
|
-
logger.warning(f"Unknown tool name: {tool_name}")
|
|
1839
|
-
|
|
1840
|
-
logger.info(f"Returning {len(enabled_tools)} enabled tools")
|
|
1841
|
-
return enabled_tools
|
|
1842
|
-
|
|
1843
1875
|
def clone_for_new_session(
|
|
1844
1876
|
self, new_session_id: Optional[str] = None
|
|
1845
1877
|
) -> "HybridBrowserToolkit":
|
|
@@ -1864,7 +1896,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1864
1896
|
user_data_dir=self._user_data_dir,
|
|
1865
1897
|
stealth=self._stealth,
|
|
1866
1898
|
web_agent_model=self._web_agent_model,
|
|
1867
|
-
cache_dir=f"{self._cache_dir.rstrip('/')}_clone_
|
|
1899
|
+
cache_dir=f"{self._cache_dir.rstrip('/')}_clone_"
|
|
1900
|
+
f"{new_session_id}/",
|
|
1868
1901
|
enabled_tools=self.enabled_tools.copy(),
|
|
1869
1902
|
browser_log_to_file=self._browser_log_to_file,
|
|
1870
1903
|
session_id=new_session_id,
|
|
@@ -1879,7 +1912,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1879
1912
|
)
|
|
1880
1913
|
|
|
1881
1914
|
@action_logger
|
|
1882
|
-
async def
|
|
1915
|
+
async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
1883
1916
|
r"""Switches to a different browser tab using its ID.
|
|
1884
1917
|
|
|
1885
1918
|
After switching, all actions will apply to the new tab. Use
|
|
@@ -1924,7 +1957,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1924
1957
|
return result
|
|
1925
1958
|
|
|
1926
1959
|
@action_logger
|
|
1927
|
-
async def
|
|
1960
|
+
async def browser_close_tab(self, *, tab_id: str) -> Dict[str, Any]:
|
|
1928
1961
|
r"""Closes a browser tab using its ID.
|
|
1929
1962
|
|
|
1930
1963
|
Use `get_tab_info` to find the ID of the tab to close. After
|
|
@@ -1936,7 +1969,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1936
1969
|
Returns:
|
|
1937
1970
|
Dict[str, Any]: A dictionary with the result of the action:
|
|
1938
1971
|
- "result" (str): Confirmation of the action.
|
|
1939
|
-
- "snapshot" (str): A snapshot of the active tab after
|
|
1972
|
+
- "snapshot" (str): A snapshot of the active tab after
|
|
1973
|
+
closure.
|
|
1940
1974
|
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
1941
1975
|
- "current_tab" (int): Index of the new active tab.
|
|
1942
1976
|
- "total_tabs" (int): Total number of remaining tabs.
|
|
@@ -1974,7 +2008,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1974
2008
|
return result
|
|
1975
2009
|
|
|
1976
2010
|
@action_logger
|
|
1977
|
-
async def
|
|
2011
|
+
async def browser_get_tab_info(self) -> Dict[str, Any]:
|
|
1978
2012
|
r"""Gets a list of all open browser tabs and their information.
|
|
1979
2013
|
|
|
1980
2014
|
This includes each tab's index, title, and URL, and indicates which
|
|
@@ -1992,3 +2026,52 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1992
2026
|
"""
|
|
1993
2027
|
await self._ensure_browser()
|
|
1994
2028
|
return await self._get_tab_info_for_output()
|
|
2029
|
+
|
|
2030
|
+
def get_tools(self) -> List[FunctionTool]:
|
|
2031
|
+
r"""Get available function tools
|
|
2032
|
+
based on enabled_tools configuration."""
|
|
2033
|
+
# Map tool names to their corresponding methods
|
|
2034
|
+
tool_map = {
|
|
2035
|
+
"browser_open": self.browser_open,
|
|
2036
|
+
"browser_close": self.browser_close,
|
|
2037
|
+
"browser_visit_page": self.browser_visit_page,
|
|
2038
|
+
"browser_back": self.browser_back,
|
|
2039
|
+
"browser_forward": self.browser_forward,
|
|
2040
|
+
"browser_get_page_snapshot": self.browser_get_page_snapshot,
|
|
2041
|
+
"browser_get_som_screenshot": self.browser_get_som_screenshot,
|
|
2042
|
+
"browser_get_page_links": self.browser_get_page_links,
|
|
2043
|
+
"browser_click": self.browser_click,
|
|
2044
|
+
"browser_type": self.browser_type,
|
|
2045
|
+
"browser_select": self.browser_select,
|
|
2046
|
+
"browser_scroll": self.browser_scroll,
|
|
2047
|
+
"browser_enter": self.browser_enter,
|
|
2048
|
+
"browser_wait_user": self.browser_wait_user,
|
|
2049
|
+
"browser_solve_task": self.browser_solve_task,
|
|
2050
|
+
"browser_switch_tab": self.browser_switch_tab,
|
|
2051
|
+
"browser_close_tab": self.browser_close_tab,
|
|
2052
|
+
"browser_get_tab_info": self.browser_get_tab_info,
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
enabled_tools = []
|
|
2056
|
+
|
|
2057
|
+
for tool_name in self.enabled_tools:
|
|
2058
|
+
if (
|
|
2059
|
+
tool_name == "browser_solve_task"
|
|
2060
|
+
and self._web_agent_model is None
|
|
2061
|
+
):
|
|
2062
|
+
logger.warning(
|
|
2063
|
+
f"Tool '{tool_name}' is enabled but web_agent_model "
|
|
2064
|
+
f"is not provided. Skipping this tool."
|
|
2065
|
+
)
|
|
2066
|
+
continue
|
|
2067
|
+
|
|
2068
|
+
if tool_name in tool_map:
|
|
2069
|
+
tool = FunctionTool(
|
|
2070
|
+
cast(Callable[..., Any], tool_map[tool_name])
|
|
2071
|
+
)
|
|
2072
|
+
enabled_tools.append(tool)
|
|
2073
|
+
else:
|
|
2074
|
+
logger.warning(f"Unknown tool name: {tool_name}")
|
|
2075
|
+
|
|
2076
|
+
logger.info(f"Returning {len(enabled_tools)} enabled tools")
|
|
2077
|
+
return enabled_tools
|