camel-ai 0.2.71a10__py3-none-any.whl → 0.2.71a12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +1 -1
- camel/models/cohere_model.py +4 -1
- camel/models/moonshot_model.py +54 -1
- camel/societies/workforce/prompts.py +32 -13
- camel/societies/workforce/role_playing_worker.py +1 -1
- camel/societies/workforce/worker.py +1 -1
- camel/societies/workforce/workforce.py +53 -18
- camel/tasks/task.py +9 -5
- camel/toolkits/function_tool.py +13 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +165 -218
- camel/toolkits/hybrid_browser_toolkit/unified_analyzer.js +3 -3
- camel/toolkits/search_toolkit.py +93 -60
- camel/toolkits/slack_toolkit.py +10 -0
- camel/types/enums.py +3 -0
- camel/utils/tool_result.py +1 -1
- {camel_ai-0.2.71a10.dist-info → camel_ai-0.2.71a12.dist-info}/METADATA +3 -3
- {camel_ai-0.2.71a10.dist-info → camel_ai-0.2.71a12.dist-info}/RECORD +20 -20
- {camel_ai-0.2.71a10.dist-info → camel_ai-0.2.71a12.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a10.dist-info → camel_ai-0.2.71a12.dist-info}/licenses/LICENSE +0 -0
|
@@ -1094,25 +1094,19 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1094
1094
|
# Public API Methods
|
|
1095
1095
|
|
|
1096
1096
|
async def open_browser(self) -> Dict[str, Any]:
|
|
1097
|
-
r"""
|
|
1098
|
-
|
|
1097
|
+
r"""Starts a new browser session. This must be the first browser
|
|
1098
|
+
action.
|
|
1099
1099
|
|
|
1100
|
-
This method initializes the
|
|
1101
|
-
|
|
1102
|
-
during toolkit initialization in the first tab. Agents cannot specify
|
|
1103
|
-
a custom URL - they must use the visit_page tool to open new tabs
|
|
1104
|
-
with other URLs.
|
|
1100
|
+
This method initializes the browser and navigates to a default start
|
|
1101
|
+
page. To visit a specific URL, use `visit_page` after this.
|
|
1105
1102
|
|
|
1106
1103
|
Returns:
|
|
1107
|
-
Dict[str, Any]: A dictionary
|
|
1108
|
-
- "result":
|
|
1109
|
-
|
|
1110
|
-
- "
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
- "tabs": List of all open tabs with their information.
|
|
1114
|
-
- "current_tab": Index of the currently active tab.
|
|
1115
|
-
- "total_tabs": Total number of open tabs.
|
|
1104
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1105
|
+
- "result" (str): Confirmation of the action.
|
|
1106
|
+
- "snapshot" (str): A textual snapshot of interactive elements.
|
|
1107
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1108
|
+
- "current_tab" (int): Index of the active tab.
|
|
1109
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1116
1110
|
"""
|
|
1117
1111
|
# Add logging if enabled
|
|
1118
1112
|
action_start = time.time()
|
|
@@ -1163,14 +1157,12 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1163
1157
|
|
|
1164
1158
|
@action_logger
|
|
1165
1159
|
async def close_browser(self) -> str:
|
|
1166
|
-
r"""Closes the
|
|
1167
|
-
resources.
|
|
1160
|
+
r"""Closes the browser session, releasing all resources.
|
|
1168
1161
|
|
|
1169
|
-
This should be called at the end of a
|
|
1170
|
-
clean shutdown of the browser instance.
|
|
1162
|
+
This should be called at the end of a task for cleanup.
|
|
1171
1163
|
|
|
1172
1164
|
Returns:
|
|
1173
|
-
str: A confirmation message
|
|
1165
|
+
str: A confirmation message.
|
|
1174
1166
|
"""
|
|
1175
1167
|
if self._agent is not None:
|
|
1176
1168
|
try:
|
|
@@ -1184,17 +1176,19 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1184
1176
|
|
|
1185
1177
|
@action_logger
|
|
1186
1178
|
async def visit_page(self, url: str) -> Dict[str, Any]:
|
|
1187
|
-
r"""
|
|
1188
|
-
|
|
1189
|
-
This method creates a new tab for the URL instead of navigating
|
|
1190
|
-
in the current tab, allowing better multi-tab management.
|
|
1179
|
+
r"""Opens a URL in a new browser tab and switches to it.
|
|
1191
1180
|
|
|
1192
1181
|
Args:
|
|
1193
|
-
url (str): The web address to load
|
|
1182
|
+
url (str): The web address to load. This should be a valid and
|
|
1183
|
+
existing URL.
|
|
1194
1184
|
|
|
1195
1185
|
Returns:
|
|
1196
|
-
Dict[str, Any]: A dictionary
|
|
1197
|
-
|
|
1186
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1187
|
+
- "result" (str): Confirmation of the action.
|
|
1188
|
+
- "snapshot" (str): A textual snapshot of the new page.
|
|
1189
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1190
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1191
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1198
1192
|
"""
|
|
1199
1193
|
if not url or not isinstance(url, str):
|
|
1200
1194
|
return {
|
|
@@ -1260,23 +1254,18 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1260
1254
|
|
|
1261
1255
|
@action_logger
|
|
1262
1256
|
async def back(self) -> Dict[str, Any]:
|
|
1263
|
-
r"""
|
|
1257
|
+
r"""Goes back to the previous page in the browser history.
|
|
1264
1258
|
|
|
1265
|
-
This
|
|
1266
|
-
|
|
1267
|
-
history.
|
|
1259
|
+
This action simulates using the browser's "back" button in the
|
|
1260
|
+
currently active tab.
|
|
1268
1261
|
|
|
1269
1262
|
Returns:
|
|
1270
|
-
Dict[str, Any]: A dictionary
|
|
1271
|
-
- "result":
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
- "
|
|
1275
|
-
|
|
1276
|
-
string "snapshot not changed".
|
|
1277
|
-
- "tabs": List of all open tabs with their information.
|
|
1278
|
-
- "current_tab": Index of the currently active tab.
|
|
1279
|
-
- "total_tabs": Total number of open tabs.
|
|
1263
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1264
|
+
- "result" (str): Confirmation of the action.
|
|
1265
|
+
- "snapshot" (str): A textual snapshot of the previous page.
|
|
1266
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1267
|
+
- "current_tab" (int): Index of the active tab.
|
|
1268
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1280
1269
|
"""
|
|
1281
1270
|
page = await self._require_page()
|
|
1282
1271
|
|
|
@@ -1329,23 +1318,18 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1329
1318
|
|
|
1330
1319
|
@action_logger
|
|
1331
1320
|
async def forward(self) -> Dict[str, Any]:
|
|
1332
|
-
r"""
|
|
1321
|
+
r"""Goes forward to the next page in the browser history.
|
|
1333
1322
|
|
|
1334
|
-
This
|
|
1335
|
-
|
|
1336
|
-
if you have previously navigated back).
|
|
1323
|
+
This action simulates using the browser's "forward" button in the
|
|
1324
|
+
currently active tab.
|
|
1337
1325
|
|
|
1338
1326
|
Returns:
|
|
1339
|
-
Dict[str, Any]: A dictionary
|
|
1340
|
-
- "result":
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
- "
|
|
1344
|
-
|
|
1345
|
-
string "snapshot not changed".
|
|
1346
|
-
- "tabs": List of all open tabs with their information.
|
|
1347
|
-
- "current_tab": Index of the currently active tab.
|
|
1348
|
-
- "total_tabs": Total number of open tabs.
|
|
1327
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1328
|
+
- "result" (str): Confirmation of the action.
|
|
1329
|
+
- "snapshot" (str): A textual snapshot of the next page.
|
|
1330
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1331
|
+
- "current_tab" (int): Index of the active tab.
|
|
1332
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1349
1333
|
"""
|
|
1350
1334
|
page = await self._require_page()
|
|
1351
1335
|
|
|
@@ -1399,20 +1383,16 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1399
1383
|
|
|
1400
1384
|
@action_logger
|
|
1401
1385
|
async def get_page_snapshot(self) -> str:
|
|
1402
|
-
r"""
|
|
1386
|
+
r"""Gets a textual snapshot of the page's interactive elements.
|
|
1403
1387
|
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
The snapshot is useful for understanding the page structure and
|
|
1410
|
-
identifying elements to interact with without needing to parse raw
|
|
1411
|
-
HTML. A new snapshot is generated on each call.
|
|
1388
|
+
The snapshot lists elements like buttons, links, and inputs, each with
|
|
1389
|
+
a unique `ref` ID. This ID is used by other tools (e.g., `click`,
|
|
1390
|
+
`type`) to interact with a specific element. This tool provides no
|
|
1391
|
+
visual information.
|
|
1412
1392
|
|
|
1413
1393
|
Returns:
|
|
1414
|
-
str: A formatted string representing the interactive elements
|
|
1415
|
-
|
|
1394
|
+
str: A formatted string representing the interactive elements and
|
|
1395
|
+
their `ref` IDs. For example:
|
|
1416
1396
|
'- link "Sign In" [ref=1]'
|
|
1417
1397
|
'- textbox "Username" [ref=2]'
|
|
1418
1398
|
"""
|
|
@@ -1435,32 +1415,25 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1435
1415
|
@dependencies_required('PIL')
|
|
1436
1416
|
@action_logger
|
|
1437
1417
|
async def get_som_screenshot(self):
|
|
1438
|
-
r"""Captures a screenshot
|
|
1439
|
-
interactive elements. "SoM" stands for "Set of Marks".
|
|
1440
|
-
|
|
1441
|
-
This method is essential for tasks requiring visual understanding of
|
|
1442
|
-
the page layout. It works by:
|
|
1443
|
-
1. Taking a full-page screenshot.
|
|
1444
|
-
2. Identifying all interactive elements (buttons, links, inputs, etc.).
|
|
1445
|
-
3. Drawing colored boxes and reference IDs (`ref`) over these elements
|
|
1446
|
-
on the screenshot.
|
|
1447
|
-
4. Saving the annotated image to a cache directory.
|
|
1448
|
-
5. Returning the image as a base64-encoded string along with a summary.
|
|
1418
|
+
r"""Captures a screenshot with interactive elements highlighted.
|
|
1449
1419
|
|
|
1450
|
-
|
|
1451
|
-
|
|
1420
|
+
"SoM" stands for "Set of Marks". This tool takes a screenshot and draws
|
|
1421
|
+
boxes around clickable elements, overlaying a `ref` ID on each. Use
|
|
1422
|
+
this for a visual understanding of the page, especially when the
|
|
1423
|
+
textual snapshot is not enough.
|
|
1452
1424
|
|
|
1453
1425
|
Returns:
|
|
1454
1426
|
ToolResult: An object containing:
|
|
1455
|
-
- `text
|
|
1427
|
+
- `text` (str): A summary, e.g., "Visual webpage screenshot
|
|
1456
1428
|
captured with 42 interactive elements".
|
|
1457
|
-
- `images
|
|
1458
|
-
|
|
1429
|
+
- `images` (List[str]): A list containing one base64-encoded
|
|
1430
|
+
PNG image data URL.
|
|
1459
1431
|
"""
|
|
1460
1432
|
from PIL import Image
|
|
1461
1433
|
|
|
1462
1434
|
from camel.utils.tool_result import ToolResult
|
|
1463
1435
|
|
|
1436
|
+
os.makedirs(self._cache_dir, exist_ok=True)
|
|
1464
1437
|
# Get screenshot and analysis
|
|
1465
1438
|
page = await self._require_page()
|
|
1466
1439
|
|
|
@@ -1516,37 +1489,33 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1516
1489
|
return ToolResult(text=text_result, images=[img_data_url])
|
|
1517
1490
|
|
|
1518
1491
|
async def click(self, *, ref: str) -> Dict[str, Any]:
|
|
1519
|
-
r"""
|
|
1492
|
+
r"""Performs a click on an element on the page.
|
|
1520
1493
|
|
|
1521
1494
|
Args:
|
|
1522
|
-
ref (str): The
|
|
1523
|
-
obtained from
|
|
1495
|
+
ref (str): The `ref` ID of the element to click. This ID is
|
|
1496
|
+
obtained from a page snapshot (`get_page_snapshot` or
|
|
1524
1497
|
`get_som_screenshot`).
|
|
1525
1498
|
|
|
1526
1499
|
Returns:
|
|
1527
|
-
Dict[str, Any]: A dictionary
|
|
1528
|
-
- "result":
|
|
1529
|
-
- "snapshot": A
|
|
1530
|
-
click
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
- "
|
|
1534
|
-
- "current_tab": Index of the currently active tab.
|
|
1535
|
-
- "total_tabs": Total number of open tabs.
|
|
1500
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1501
|
+
- "result" (str): Confirmation of the action.
|
|
1502
|
+
- "snapshot" (str): A textual snapshot of the page after the
|
|
1503
|
+
click.
|
|
1504
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1505
|
+
- "current_tab" (int): Index of the active tab.
|
|
1506
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1536
1507
|
"""
|
|
1537
1508
|
self._validate_ref(ref, "click")
|
|
1538
1509
|
|
|
1539
1510
|
analysis = await self._get_unified_analysis()
|
|
1540
1511
|
elements = analysis.get("elements", {})
|
|
1541
1512
|
if ref not in elements:
|
|
1542
|
-
available_refs = list(elements.keys())
|
|
1543
1513
|
logger.error(f"Error: Element reference '{ref}' not found. ")
|
|
1544
1514
|
# Added snapshot to give more context on failure
|
|
1545
1515
|
snapshot = self._format_snapshot_from_analysis(analysis)
|
|
1546
1516
|
tab_info = await self._get_tab_info_for_output()
|
|
1547
1517
|
return {
|
|
1548
|
-
"result": f"Error: Element reference '{ref}' not found. "
|
|
1549
|
-
f"Available refs: {available_refs}",
|
|
1518
|
+
"result": f"Error: Element reference '{ref}' not found. ",
|
|
1550
1519
|
"snapshot": snapshot,
|
|
1551
1520
|
**tab_info,
|
|
1552
1521
|
}
|
|
@@ -1564,20 +1533,20 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1564
1533
|
return result
|
|
1565
1534
|
|
|
1566
1535
|
async def type(self, *, ref: str, text: str) -> Dict[str, Any]:
|
|
1567
|
-
r"""Types text into an input
|
|
1536
|
+
r"""Types text into an input element on the page.
|
|
1568
1537
|
|
|
1569
1538
|
Args:
|
|
1570
|
-
ref (str): The
|
|
1571
|
-
text (str): The text to
|
|
1539
|
+
ref (str): The `ref` ID of the input element, from a snapshot.
|
|
1540
|
+
text (str): The text to type into the element.
|
|
1572
1541
|
|
|
1573
1542
|
Returns:
|
|
1574
|
-
Dict[str, Any]: A dictionary
|
|
1575
|
-
- "result":
|
|
1576
|
-
- "snapshot": A
|
|
1577
|
-
|
|
1578
|
-
- "tabs"
|
|
1579
|
-
- "current_tab": Index of the
|
|
1580
|
-
- "total_tabs": Total number of open tabs.
|
|
1543
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1544
|
+
- "result" (str): Confirmation of the action.
|
|
1545
|
+
- "snapshot" (str): A textual snapshot of the page after
|
|
1546
|
+
typing.
|
|
1547
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1548
|
+
- "current_tab" (int): Index of the active tab.
|
|
1549
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1581
1550
|
"""
|
|
1582
1551
|
self._validate_ref(ref, "type")
|
|
1583
1552
|
await self._get_unified_analysis() # Ensure aria-ref attributes
|
|
@@ -1592,21 +1561,21 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1592
1561
|
return result
|
|
1593
1562
|
|
|
1594
1563
|
async def select(self, *, ref: str, value: str) -> Dict[str, Any]:
|
|
1595
|
-
r"""Selects an option
|
|
1564
|
+
r"""Selects an option in a dropdown (`<select>`) element.
|
|
1596
1565
|
|
|
1597
1566
|
Args:
|
|
1598
|
-
ref (str): The
|
|
1599
|
-
value (str): The value of the `<option>` to
|
|
1600
|
-
|
|
1601
|
-
visible text.
|
|
1567
|
+
ref (str): The `ref` ID of the `<select>` element.
|
|
1568
|
+
value (str): The `value` attribute of the `<option>` to select,
|
|
1569
|
+
not its visible text.
|
|
1602
1570
|
|
|
1603
1571
|
Returns:
|
|
1604
|
-
Dict[str, Any]: A dictionary
|
|
1605
|
-
- "result":
|
|
1606
|
-
- "snapshot": A
|
|
1607
|
-
|
|
1608
|
-
- "
|
|
1609
|
-
- "
|
|
1572
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1573
|
+
- "result" (str): Confirmation of the action.
|
|
1574
|
+
- "snapshot" (str): A snapshot of the page after the
|
|
1575
|
+
selection.
|
|
1576
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1577
|
+
- "current_tab" (int): Index of the active tab.
|
|
1578
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1610
1579
|
"""
|
|
1611
1580
|
self._validate_ref(ref, "select")
|
|
1612
1581
|
await self._get_unified_analysis()
|
|
@@ -1621,20 +1590,19 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1621
1590
|
return result
|
|
1622
1591
|
|
|
1623
1592
|
async def scroll(self, *, direction: str, amount: int) -> Dict[str, Any]:
|
|
1624
|
-
r"""Scrolls the page window
|
|
1593
|
+
r"""Scrolls the current page window.
|
|
1625
1594
|
|
|
1626
1595
|
Args:
|
|
1627
|
-
direction (str): The direction to scroll
|
|
1628
|
-
'down'.
|
|
1596
|
+
direction (str): The direction to scroll: 'up' or 'down'.
|
|
1629
1597
|
amount (int): The number of pixels to scroll.
|
|
1630
1598
|
|
|
1631
1599
|
Returns:
|
|
1632
|
-
Dict[str, Any]: A dictionary
|
|
1633
|
-
- "result":
|
|
1634
|
-
- "snapshot": A
|
|
1635
|
-
- "tabs"
|
|
1636
|
-
- "current_tab": Index of the
|
|
1637
|
-
- "total_tabs": Total number of open tabs.
|
|
1600
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1601
|
+
- "result" (str): Confirmation of the action.
|
|
1602
|
+
- "snapshot" (str): A snapshot of the page after scrolling.
|
|
1603
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1604
|
+
- "current_tab" (int): Index of the active tab.
|
|
1605
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1638
1606
|
"""
|
|
1639
1607
|
if direction not in ("up", "down"):
|
|
1640
1608
|
tab_info = await self._get_tab_info_for_output()
|
|
@@ -1656,25 +1624,17 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1656
1624
|
async def enter(self) -> Dict[str, Any]:
|
|
1657
1625
|
r"""Simulates pressing the Enter key on the currently focused element.
|
|
1658
1626
|
|
|
1659
|
-
This
|
|
1660
|
-
|
|
1661
|
-
an element, such as:
|
|
1662
|
-
- Submitting a search query after typing in a search box.
|
|
1663
|
-
- Confirming a form submission.
|
|
1664
|
-
- Executing a command in a text input field.
|
|
1665
|
-
|
|
1666
|
-
The common usage pattern is to first use the 'type' tool to input
|
|
1667
|
-
text, which sets the focus, and then call 'enter' without any
|
|
1668
|
-
parameters to trigger the action.
|
|
1627
|
+
This is useful for submitting forms or search queries after using the
|
|
1628
|
+
`type` tool.
|
|
1669
1629
|
|
|
1670
1630
|
Returns:
|
|
1671
|
-
Dict[str, Any]: A dictionary
|
|
1672
|
-
- "result":
|
|
1673
|
-
- "snapshot": A new page snapshot, as this action often
|
|
1674
|
-
triggers navigation
|
|
1675
|
-
- "tabs"
|
|
1676
|
-
- "current_tab": Index of the
|
|
1677
|
-
- "total_tabs": Total number of open tabs.
|
|
1631
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1632
|
+
- "result" (str): Confirmation of the action.
|
|
1633
|
+
- "snapshot" (str): A new page snapshot, as this action often
|
|
1634
|
+
triggers navigation.
|
|
1635
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1636
|
+
- "current_tab" (int): Index of the active tab.
|
|
1637
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1678
1638
|
"""
|
|
1679
1639
|
# Always press Enter on the currently focused element
|
|
1680
1640
|
action = {"type": "enter"}
|
|
@@ -1691,25 +1651,22 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1691
1651
|
async def wait_user(
|
|
1692
1652
|
self, timeout_sec: Optional[float] = None
|
|
1693
1653
|
) -> Dict[str, Any]:
|
|
1694
|
-
r"""Pauses
|
|
1654
|
+
r"""Pauses execution and waits for human input from the console.
|
|
1695
1655
|
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
until the user presses the Enter key.
|
|
1656
|
+
Use this for tasks requiring manual steps, like solving a CAPTCHA. The
|
|
1657
|
+
agent will resume after the user presses Enter in the console.
|
|
1699
1658
|
|
|
1700
1659
|
Args:
|
|
1701
|
-
timeout_sec (Optional[float]):
|
|
1702
|
-
|
|
1703
|
-
automatically. If `None`, it will wait indefinitely.
|
|
1660
|
+
timeout_sec (Optional[float]): Max time to wait in seconds. If
|
|
1661
|
+
`None`, it will wait indefinitely.
|
|
1704
1662
|
|
|
1705
1663
|
Returns:
|
|
1706
|
-
Dict[str, Any]: A dictionary
|
|
1707
|
-
- "result": A message indicating how the wait ended
|
|
1708
|
-
|
|
1709
|
-
- "
|
|
1710
|
-
- "
|
|
1711
|
-
- "
|
|
1712
|
-
- "total_tabs": Total number of open tabs.
|
|
1664
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1665
|
+
- "result" (str): A message indicating how the wait ended.
|
|
1666
|
+
- "snapshot" (str): The page snapshot after the wait.
|
|
1667
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1668
|
+
- "current_tab" (int): Index of the active tab.
|
|
1669
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1713
1670
|
"""
|
|
1714
1671
|
import asyncio
|
|
1715
1672
|
|
|
@@ -1756,20 +1713,18 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1756
1713
|
|
|
1757
1714
|
@action_logger
|
|
1758
1715
|
async def get_page_links(self, *, ref: List[str]) -> Dict[str, Any]:
|
|
1759
|
-
r"""
|
|
1716
|
+
r"""Gets the destination URLs for a list of link elements.
|
|
1760
1717
|
|
|
1761
|
-
This is useful
|
|
1762
|
-
clicking it.
|
|
1718
|
+
This is useful to know where a link goes before clicking it.
|
|
1763
1719
|
|
|
1764
1720
|
Args:
|
|
1765
|
-
ref (List[str]): A list of
|
|
1766
|
-
|
|
1721
|
+
ref (List[str]): A list of `ref` IDs for link elements, obtained
|
|
1722
|
+
from a page snapshot.
|
|
1767
1723
|
|
|
1768
1724
|
Returns:
|
|
1769
1725
|
Dict[str, Any]: A dictionary containing:
|
|
1770
|
-
- "links": A list of
|
|
1771
|
-
|
|
1772
|
-
keys.
|
|
1726
|
+
- "links" (List[Dict]): A list of found links, where each
|
|
1727
|
+
link has "text", "ref", and "url" keys.
|
|
1773
1728
|
"""
|
|
1774
1729
|
if not ref or not isinstance(ref, list):
|
|
1775
1730
|
return {"links": []}
|
|
@@ -1790,26 +1745,25 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1790
1745
|
async def solve_task(
|
|
1791
1746
|
self, task_prompt: str, start_url: str, max_steps: int = 15
|
|
1792
1747
|
) -> str:
|
|
1793
|
-
r"""
|
|
1748
|
+
r"""Delegates a complex, high-level task to a specialized web agent.
|
|
1794
1749
|
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1750
|
+
Use this for multi-step tasks that can be described in a single prompt
|
|
1751
|
+
(e.g., "log into my account and check for new messages"). The agent
|
|
1752
|
+
will autonomously perform the necessary browser actions.
|
|
1798
1753
|
|
|
1799
|
-
|
|
1800
|
-
|
|
1754
|
+
NOTE: This is a high-level action; for simple interactions, use tools
|
|
1755
|
+
like `click` and `type`. `web_agent_model` must be provided during
|
|
1756
|
+
toolkit initialization.
|
|
1801
1757
|
|
|
1802
1758
|
Args:
|
|
1803
|
-
task_prompt (str): A natural language description of the task
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1759
|
+
task_prompt (str): A natural language description of the task.
|
|
1760
|
+
start_url (str): The URL to start the task from. This should be a
|
|
1761
|
+
valid and existing URL, as agents may generate non-existent
|
|
1762
|
+
ones.
|
|
1763
|
+
max_steps (int): The maximum number of steps the agent can take.
|
|
1808
1764
|
|
|
1809
1765
|
Returns:
|
|
1810
|
-
str: A summary message indicating
|
|
1811
|
-
finished. The detailed trace of the agent's actions will be
|
|
1812
|
-
printed to the standard output.
|
|
1766
|
+
str: A summary message indicating the task has finished.
|
|
1813
1767
|
"""
|
|
1814
1768
|
agent = self._ensure_agent()
|
|
1815
1769
|
await agent.navigate(start_url)
|
|
@@ -1944,25 +1898,21 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1944
1898
|
|
|
1945
1899
|
@action_logger
|
|
1946
1900
|
async def switch_tab(self, *, tab_index: int) -> Dict[str, Any]:
|
|
1947
|
-
r"""Switches to a
|
|
1901
|
+
r"""Switches to a different browser tab using its index.
|
|
1948
1902
|
|
|
1949
|
-
|
|
1950
|
-
|
|
1951
|
-
selected tab.
|
|
1903
|
+
After switching, all actions will apply to the new tab. Use
|
|
1904
|
+
`get_tab_info` to find the index of the tab you want to switch to.
|
|
1952
1905
|
|
|
1953
1906
|
Args:
|
|
1954
|
-
tab_index (int): The zero-based index of the tab to
|
|
1955
|
-
Use `get_tab_info` to see available tabs and their indices.
|
|
1907
|
+
tab_index (int): The zero-based index of the tab to activate.
|
|
1956
1908
|
|
|
1957
1909
|
Returns:
|
|
1958
|
-
Dict[str, Any]: A dictionary
|
|
1959
|
-
- "result":
|
|
1960
|
-
|
|
1961
|
-
- "
|
|
1962
|
-
|
|
1963
|
-
- "
|
|
1964
|
-
- "current_tab": Index of the currently active tab.
|
|
1965
|
-
- "total_tabs": Total number of open tabs.
|
|
1910
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1911
|
+
- "result" (str): Confirmation of the action.
|
|
1912
|
+
- "snapshot" (str): A snapshot of the newly active tab.
|
|
1913
|
+
- "tabs" (List[Dict]): Information about all open tabs.
|
|
1914
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1915
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
1966
1916
|
"""
|
|
1967
1917
|
await self._ensure_browser()
|
|
1968
1918
|
session = await self._get_session()
|
|
@@ -1993,24 +1943,21 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
1993
1943
|
|
|
1994
1944
|
@action_logger
|
|
1995
1945
|
async def close_tab(self, *, tab_index: int) -> Dict[str, Any]:
|
|
1996
|
-
r"""Closes a
|
|
1946
|
+
r"""Closes a browser tab using its index.
|
|
1997
1947
|
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
session will remain active but without any pages.
|
|
1948
|
+
Use `get_tab_info` to find the index of the tab to close. After
|
|
1949
|
+
closing, the browser will switch to another tab if available.
|
|
2001
1950
|
|
|
2002
1951
|
Args:
|
|
2003
1952
|
tab_index (int): The zero-based index of the tab to close.
|
|
2004
1953
|
|
|
2005
1954
|
Returns:
|
|
2006
|
-
Dict[str, Any]: A dictionary
|
|
2007
|
-
- "result":
|
|
2008
|
-
|
|
2009
|
-
- "
|
|
2010
|
-
|
|
2011
|
-
- "
|
|
2012
|
-
- "current_tab": Index of the currently active tab.
|
|
2013
|
-
- "total_tabs": Total number of remaining open tabs.
|
|
1955
|
+
Dict[str, Any]: A dictionary with the result of the action:
|
|
1956
|
+
- "result" (str): Confirmation of the action.
|
|
1957
|
+
- "snapshot" (str): A snapshot of the active tab after closure.
|
|
1958
|
+
- "tabs" (List[Dict]): Information about remaining tabs.
|
|
1959
|
+
- "current_tab" (int): Index of the new active tab.
|
|
1960
|
+
- "total_tabs" (int): Total number of remaining tabs.
|
|
2014
1961
|
"""
|
|
2015
1962
|
await self._ensure_browser()
|
|
2016
1963
|
session = await self._get_session()
|
|
@@ -2046,20 +1993,20 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
2046
1993
|
|
|
2047
1994
|
@action_logger
|
|
2048
1995
|
async def get_tab_info(self) -> Dict[str, Any]:
|
|
2049
|
-
r"""
|
|
1996
|
+
r"""Gets a list of all open browser tabs and their information.
|
|
2050
1997
|
|
|
2051
|
-
This
|
|
2052
|
-
|
|
1998
|
+
This includes each tab's index, title, and URL, and indicates which
|
|
1999
|
+
tab is currently active. Use this to manage multiple tabs.
|
|
2053
2000
|
|
|
2054
2001
|
Returns:
|
|
2055
|
-
Dict[str, Any]: A dictionary
|
|
2056
|
-
- "tabs": A list of
|
|
2057
|
-
- "index": The zero-based index
|
|
2058
|
-
- "title": The page title
|
|
2059
|
-
- "url": The current URL
|
|
2060
|
-
- "is_current":
|
|
2061
|
-
- "current_tab": Index of the
|
|
2062
|
-
- "total_tabs": Total number of open tabs
|
|
2002
|
+
Dict[str, Any]: A dictionary with tab information:
|
|
2003
|
+
- "tabs" (List[Dict]): A list of open tabs, each with:
|
|
2004
|
+
- "index" (int): The tab's zero-based index.
|
|
2005
|
+
- "title" (str): The page title.
|
|
2006
|
+
- "url" (str): The current URL.
|
|
2007
|
+
- "is_current" (bool): True if the tab is active.
|
|
2008
|
+
- "current_tab" (int): Index of the active tab.
|
|
2009
|
+
- "total_tabs" (int): Total number of open tabs.
|
|
2063
2010
|
"""
|
|
2064
2011
|
await self._ensure_browser()
|
|
2065
2012
|
return await self._get_tab_info_for_output()
|
|
@@ -735,11 +735,11 @@
|
|
|
735
735
|
function renderTree(node, indent = '') {
|
|
736
736
|
const lines = [];
|
|
737
737
|
let meaningfulProps = '';
|
|
738
|
-
if (node.disabled) meaningfulProps += ' disabled';
|
|
739
|
-
if (node.occluded) meaningfulProps += ' occluded';
|
|
738
|
+
if (node.disabled) meaningfulProps += ' [disabled]';
|
|
739
|
+
if (node.occluded) meaningfulProps += ' [occluded]';
|
|
740
740
|
if (node.checked !== undefined) meaningfulProps += ` checked=${node.checked}`;
|
|
741
741
|
if (node.expanded !== undefined) meaningfulProps += ` expanded=${node.expanded}`;
|
|
742
|
-
if (node.selected) meaningfulProps += ' selected';
|
|
742
|
+
if (node.selected) meaningfulProps += ' [selected]';
|
|
743
743
|
|
|
744
744
|
// Add level attribute following Playwright's format
|
|
745
745
|
if (node.level !== undefined) meaningfulProps += ` [level=${node.level}]`;
|