camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +6 -2
- camel/agents/chat_agent.py +357 -18
- camel/messages/base.py +2 -6
- camel/messages/func_message.py +32 -5
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/workforce/single_agent_worker.py +1 -5
- camel/societies/workforce/workforce.py +68 -8
- camel/tasks/task.py +2 -2
- camel/toolkits/__init__.py +2 -2
- camel/toolkits/craw4ai_toolkit.py +27 -7
- camel/toolkits/file_write_toolkit.py +110 -31
- camel/toolkits/human_toolkit.py +19 -14
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
- camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
- camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
- camel/toolkits/jina_reranker_toolkit.py +3 -4
- camel/toolkits/terminal_toolkit.py +189 -48
- camel/toolkits/video_download_toolkit.py +1 -2
- camel/types/agents/tool_calling_record.py +4 -1
- camel/types/enums.py +24 -24
- camel/utils/message_summarizer.py +148 -0
- camel/utils/tool_result.py +44 -0
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +19 -5
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +31 -28
- camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py
CHANGED
camel/agents/_types.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from typing import Any, Dict, List, Optional, Union
|
|
15
15
|
|
|
16
16
|
from openai import AsyncStream, Stream
|
|
17
|
+
from openai.types.chat import ChatCompletionChunk
|
|
17
18
|
from pydantic import BaseModel, ConfigDict
|
|
18
19
|
|
|
19
20
|
from camel.messages import BaseMessage
|
|
@@ -32,8 +33,11 @@ class ModelResponse(BaseModel):
|
|
|
32
33
|
r"""The response from the model."""
|
|
33
34
|
|
|
34
35
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
response: Union[
|
|
37
|
+
ChatCompletion,
|
|
38
|
+
Stream[ChatCompletionChunk],
|
|
39
|
+
AsyncStream[ChatCompletionChunk],
|
|
40
|
+
]
|
|
37
41
|
tool_call_requests: Optional[List[ToolCallRequest]]
|
|
38
42
|
output_messages: List[BaseMessage]
|
|
39
43
|
finish_reasons: List[str]
|
camel/agents/chat_agent.py
CHANGED
|
@@ -13,10 +13,12 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
+
import asyncio
|
|
16
17
|
import json
|
|
17
18
|
import logging
|
|
18
19
|
import textwrap
|
|
19
20
|
import threading
|
|
21
|
+
import time
|
|
20
22
|
import uuid
|
|
21
23
|
from collections import defaultdict
|
|
22
24
|
from pathlib import Path
|
|
@@ -83,6 +85,7 @@ from camel.utils import (
|
|
|
83
85
|
model_from_json_schema,
|
|
84
86
|
)
|
|
85
87
|
from camel.utils.commons import dependencies_required
|
|
88
|
+
from camel.utils.tool_result import ToolResult
|
|
86
89
|
|
|
87
90
|
if TYPE_CHECKING:
|
|
88
91
|
from camel.terminators import ResponseTerminator
|
|
@@ -173,6 +176,11 @@ class ChatAgent(BaseAgent):
|
|
|
173
176
|
stop_event (Optional[threading.Event], optional): Event to signal
|
|
174
177
|
termination of the agent's operation. When set, the agent will
|
|
175
178
|
terminate its execution. (default: :obj:`None`)
|
|
179
|
+
mask_tool_output (Optional[bool]): Whether to return a sanitized
|
|
180
|
+
placeholder instead of the raw tool output. (default: :obj:`False`)
|
|
181
|
+
pause_event (Optional[asyncio.Event]): Event to signal pause of the
|
|
182
|
+
agent's operation. When clear, the agent will pause its execution.
|
|
183
|
+
(default: :obj:`None`)
|
|
176
184
|
"""
|
|
177
185
|
|
|
178
186
|
def __init__(
|
|
@@ -206,6 +214,8 @@ class ChatAgent(BaseAgent):
|
|
|
206
214
|
max_iteration: Optional[int] = None,
|
|
207
215
|
agent_id: Optional[str] = None,
|
|
208
216
|
stop_event: Optional[threading.Event] = None,
|
|
217
|
+
mask_tool_output: bool = False,
|
|
218
|
+
pause_event: Optional[asyncio.Event] = None,
|
|
209
219
|
) -> None:
|
|
210
220
|
if isinstance(model, ModelManager):
|
|
211
221
|
self.model_backend = model
|
|
@@ -280,11 +290,19 @@ class ChatAgent(BaseAgent):
|
|
|
280
290
|
self.response_terminators = response_terminators or []
|
|
281
291
|
self.max_iteration = max_iteration
|
|
282
292
|
self.stop_event = stop_event
|
|
293
|
+
self.mask_tool_output = mask_tool_output
|
|
294
|
+
self._secure_result_store: Dict[str, Any] = {}
|
|
295
|
+
self._pending_images: List[str] = []
|
|
296
|
+
self._image_retry_count: Dict[str, int] = {}
|
|
297
|
+
# Store images to attach to next user message
|
|
298
|
+
self.pause_event = pause_event
|
|
283
299
|
|
|
284
300
|
def reset(self):
|
|
285
301
|
r"""Resets the :obj:`ChatAgent` to its initial state."""
|
|
286
302
|
self.terminated = False
|
|
287
303
|
self.init_messages()
|
|
304
|
+
self._pending_images = []
|
|
305
|
+
self._image_retry_count = {}
|
|
288
306
|
for terminator in self.response_terminators:
|
|
289
307
|
terminator.reset()
|
|
290
308
|
|
|
@@ -1128,6 +1146,16 @@ class ChatAgent(BaseAgent):
|
|
|
1128
1146
|
role_name="User", content=input_message
|
|
1129
1147
|
)
|
|
1130
1148
|
|
|
1149
|
+
# Attach any pending images from previous tool calls
|
|
1150
|
+
image_list = self._process_pending_images()
|
|
1151
|
+
if image_list:
|
|
1152
|
+
# Create new message with images attached
|
|
1153
|
+
input_message = BaseMessage.make_user_message(
|
|
1154
|
+
role_name="User",
|
|
1155
|
+
content=input_message.content,
|
|
1156
|
+
image_list=image_list,
|
|
1157
|
+
)
|
|
1158
|
+
|
|
1131
1159
|
# Add user input to memory
|
|
1132
1160
|
self.update_memory(input_message, OpenAIBackendRole.USER)
|
|
1133
1161
|
|
|
@@ -1143,6 +1171,10 @@ class ChatAgent(BaseAgent):
|
|
|
1143
1171
|
iteration_count = 0
|
|
1144
1172
|
|
|
1145
1173
|
while True:
|
|
1174
|
+
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1175
|
+
while not self.pause_event.is_set():
|
|
1176
|
+
time.sleep(0.001)
|
|
1177
|
+
|
|
1146
1178
|
try:
|
|
1147
1179
|
openai_messages, num_tokens = self.memory.get_context()
|
|
1148
1180
|
accumulated_context_tokens += num_tokens
|
|
@@ -1184,6 +1216,12 @@ class ChatAgent(BaseAgent):
|
|
|
1184
1216
|
external_tool_call_requests = []
|
|
1185
1217
|
external_tool_call_requests.append(tool_call_request)
|
|
1186
1218
|
else:
|
|
1219
|
+
if (
|
|
1220
|
+
self.pause_event is not None
|
|
1221
|
+
and not self.pause_event.is_set()
|
|
1222
|
+
):
|
|
1223
|
+
while not self.pause_event.is_set():
|
|
1224
|
+
time.sleep(0.001)
|
|
1187
1225
|
tool_call_records.append(
|
|
1188
1226
|
self._execute_tool(tool_call_request)
|
|
1189
1227
|
)
|
|
@@ -1275,6 +1313,16 @@ class ChatAgent(BaseAgent):
|
|
|
1275
1313
|
role_name="User", content=input_message
|
|
1276
1314
|
)
|
|
1277
1315
|
|
|
1316
|
+
# Attach any pending images from previous tool calls
|
|
1317
|
+
image_list = self._process_pending_images()
|
|
1318
|
+
if image_list:
|
|
1319
|
+
# Create new message with images attached
|
|
1320
|
+
input_message = BaseMessage.make_user_message(
|
|
1321
|
+
role_name="User",
|
|
1322
|
+
content=input_message.content,
|
|
1323
|
+
image_list=image_list,
|
|
1324
|
+
)
|
|
1325
|
+
|
|
1278
1326
|
self.update_memory(input_message, OpenAIBackendRole.USER)
|
|
1279
1327
|
|
|
1280
1328
|
tool_call_records: List[ToolCallingRecord] = []
|
|
@@ -1287,6 +1335,8 @@ class ChatAgent(BaseAgent):
|
|
|
1287
1335
|
step_token_usage = self._create_token_usage_tracker()
|
|
1288
1336
|
iteration_count = 0
|
|
1289
1337
|
while True:
|
|
1338
|
+
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1339
|
+
await self.pause_event.wait()
|
|
1290
1340
|
try:
|
|
1291
1341
|
openai_messages, num_tokens = self.memory.get_context()
|
|
1292
1342
|
accumulated_context_tokens += num_tokens
|
|
@@ -1319,6 +1369,7 @@ class ChatAgent(BaseAgent):
|
|
|
1319
1369
|
|
|
1320
1370
|
if tool_call_requests := response.tool_call_requests:
|
|
1321
1371
|
# Process all tool calls
|
|
1372
|
+
new_images_from_tools = []
|
|
1322
1373
|
for tool_call_request in tool_call_requests:
|
|
1323
1374
|
if (
|
|
1324
1375
|
tool_call_request.tool_name
|
|
@@ -1328,15 +1379,82 @@ class ChatAgent(BaseAgent):
|
|
|
1328
1379
|
external_tool_call_requests = []
|
|
1329
1380
|
external_tool_call_requests.append(tool_call_request)
|
|
1330
1381
|
else:
|
|
1382
|
+
if (
|
|
1383
|
+
self.pause_event is not None
|
|
1384
|
+
and not self.pause_event.is_set()
|
|
1385
|
+
):
|
|
1386
|
+
await self.pause_event.wait()
|
|
1331
1387
|
tool_call_record = await self._aexecute_tool(
|
|
1332
1388
|
tool_call_request
|
|
1333
1389
|
)
|
|
1334
1390
|
tool_call_records.append(tool_call_record)
|
|
1335
1391
|
|
|
1392
|
+
# Check if this tool call produced images
|
|
1393
|
+
if (
|
|
1394
|
+
hasattr(tool_call_record, 'images')
|
|
1395
|
+
and tool_call_record.images
|
|
1396
|
+
):
|
|
1397
|
+
new_images_from_tools.extend(
|
|
1398
|
+
tool_call_record.images
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1336
1401
|
# If we found an external tool call, break the loop
|
|
1337
1402
|
if external_tool_call_requests:
|
|
1338
1403
|
break
|
|
1339
1404
|
|
|
1405
|
+
# If tools produced images
|
|
1406
|
+
# send them to the model as a user message
|
|
1407
|
+
if new_images_from_tools:
|
|
1408
|
+
# Convert base64 images to PIL Images
|
|
1409
|
+
image_list = []
|
|
1410
|
+
for img_data in new_images_from_tools:
|
|
1411
|
+
try:
|
|
1412
|
+
import base64
|
|
1413
|
+
import io
|
|
1414
|
+
|
|
1415
|
+
from PIL import Image
|
|
1416
|
+
|
|
1417
|
+
# Extract base64 data from data URL format
|
|
1418
|
+
if img_data.startswith("data:image"):
|
|
1419
|
+
# Format:
|
|
1420
|
+
# "..."
|
|
1421
|
+
base64_data = img_data.split(',', 1)[1]
|
|
1422
|
+
else:
|
|
1423
|
+
# Raw base64 data
|
|
1424
|
+
base64_data = img_data
|
|
1425
|
+
|
|
1426
|
+
# Decode and create PIL Image
|
|
1427
|
+
image_bytes = base64.b64decode(base64_data)
|
|
1428
|
+
pil_image = Image.open(io.BytesIO(image_bytes))
|
|
1429
|
+
# Convert to ensure proper
|
|
1430
|
+
# Image.Image type for compatibility
|
|
1431
|
+
pil_image_tool_result: Image.Image = (
|
|
1432
|
+
pil_image.convert('RGB')
|
|
1433
|
+
)
|
|
1434
|
+
image_list.append(pil_image_tool_result)
|
|
1435
|
+
|
|
1436
|
+
except Exception as e:
|
|
1437
|
+
logger.warning(
|
|
1438
|
+
f"Failed to convert "
|
|
1439
|
+
f"base64 image to PIL for immediate use: {e}"
|
|
1440
|
+
)
|
|
1441
|
+
continue
|
|
1442
|
+
|
|
1443
|
+
# If we have valid images
|
|
1444
|
+
# create a user message with images
|
|
1445
|
+
if image_list:
|
|
1446
|
+
# Create a user message with images
|
|
1447
|
+
# to provide visual context immediately
|
|
1448
|
+
image_message = BaseMessage.make_user_message(
|
|
1449
|
+
role_name="User",
|
|
1450
|
+
content="[Visual content from tool execution - please analyze and continue]", # noqa: E501
|
|
1451
|
+
image_list=image_list,
|
|
1452
|
+
)
|
|
1453
|
+
|
|
1454
|
+
self.update_memory(
|
|
1455
|
+
image_message, OpenAIBackendRole.USER
|
|
1456
|
+
)
|
|
1457
|
+
|
|
1340
1458
|
if (
|
|
1341
1459
|
self.max_iteration is not None
|
|
1342
1460
|
and iteration_count >= self.max_iteration
|
|
@@ -1423,6 +1541,69 @@ class ChatAgent(BaseAgent):
|
|
|
1423
1541
|
info=info,
|
|
1424
1542
|
)
|
|
1425
1543
|
|
|
1544
|
+
def _process_pending_images(self) -> List:
|
|
1545
|
+
r"""Process pending images with retry logic and return PIL Image list.
|
|
1546
|
+
|
|
1547
|
+
Returns:
|
|
1548
|
+
List: List of successfully converted PIL Images.
|
|
1549
|
+
"""
|
|
1550
|
+
if not self._pending_images:
|
|
1551
|
+
return []
|
|
1552
|
+
|
|
1553
|
+
image_list = []
|
|
1554
|
+
successfully_processed = []
|
|
1555
|
+
failed_images = []
|
|
1556
|
+
|
|
1557
|
+
for img_data in self._pending_images:
|
|
1558
|
+
# Track retry count
|
|
1559
|
+
retry_count = self._image_retry_count.get(img_data, 0)
|
|
1560
|
+
|
|
1561
|
+
# Remove images that have failed too many times (max 3 attempts)
|
|
1562
|
+
if retry_count >= 3:
|
|
1563
|
+
failed_images.append(img_data)
|
|
1564
|
+
logger.warning(
|
|
1565
|
+
f"Removing image after {retry_count} failed attempts"
|
|
1566
|
+
)
|
|
1567
|
+
continue
|
|
1568
|
+
|
|
1569
|
+
try:
|
|
1570
|
+
import base64
|
|
1571
|
+
import io
|
|
1572
|
+
|
|
1573
|
+
from PIL import Image
|
|
1574
|
+
|
|
1575
|
+
# Extract base64 data from data URL format
|
|
1576
|
+
if img_data.startswith("data:image"):
|
|
1577
|
+
# Format: "..."
|
|
1578
|
+
base64_data = img_data.split(',', 1)[1]
|
|
1579
|
+
else:
|
|
1580
|
+
# Raw base64 data
|
|
1581
|
+
base64_data = img_data
|
|
1582
|
+
|
|
1583
|
+
# Decode and create PIL Image
|
|
1584
|
+
image_bytes = base64.b64decode(base64_data)
|
|
1585
|
+
pil_image = Image.open(io.BytesIO(image_bytes))
|
|
1586
|
+
pil_image_converted: Image.Image = pil_image.convert('RGB')
|
|
1587
|
+
image_list.append(pil_image_converted)
|
|
1588
|
+
successfully_processed.append(img_data)
|
|
1589
|
+
|
|
1590
|
+
except Exception as e:
|
|
1591
|
+
# Increment retry count for failed conversion
|
|
1592
|
+
self._image_retry_count[img_data] = retry_count + 1
|
|
1593
|
+
logger.warning(
|
|
1594
|
+
f"Failed to convert base64 image to PIL "
|
|
1595
|
+
f"(attempt {retry_count + 1}/3): {e}"
|
|
1596
|
+
)
|
|
1597
|
+
continue
|
|
1598
|
+
|
|
1599
|
+
# Clean up processed and failed images
|
|
1600
|
+
for img in successfully_processed + failed_images:
|
|
1601
|
+
self._pending_images.remove(img)
|
|
1602
|
+
# Clean up retry count for processed/removed images
|
|
1603
|
+
self._image_retry_count.pop(img, None)
|
|
1604
|
+
|
|
1605
|
+
return image_list
|
|
1606
|
+
|
|
1426
1607
|
def _record_final_output(self, output_messages: List[BaseMessage]) -> None:
|
|
1427
1608
|
r"""Log final messages or warnings about multiple responses."""
|
|
1428
1609
|
if len(output_messages) == 1:
|
|
@@ -1433,6 +1614,61 @@ class ChatAgent(BaseAgent):
|
|
|
1433
1614
|
"selected message manually using `record_message()`."
|
|
1434
1615
|
)
|
|
1435
1616
|
|
|
1617
|
+
def _is_vision_error(self, exc: Exception) -> bool:
|
|
1618
|
+
r"""Check if the exception is likely related to vision/image is not
|
|
1619
|
+
supported by the model."""
|
|
1620
|
+
# TODO: more robust vision error detection
|
|
1621
|
+
error_msg = str(exc).lower()
|
|
1622
|
+
vision_keywords = [
|
|
1623
|
+
'vision',
|
|
1624
|
+
'image',
|
|
1625
|
+
'multimodal',
|
|
1626
|
+
'unsupported',
|
|
1627
|
+
'invalid content type',
|
|
1628
|
+
'image_url',
|
|
1629
|
+
'visual',
|
|
1630
|
+
]
|
|
1631
|
+
return any(keyword in error_msg for keyword in vision_keywords)
|
|
1632
|
+
|
|
1633
|
+
def _has_images(self, messages: List[OpenAIMessage]) -> bool:
|
|
1634
|
+
r"""Check if any message contains images."""
|
|
1635
|
+
for msg in messages:
|
|
1636
|
+
content = msg.get('content')
|
|
1637
|
+
if isinstance(content, list):
|
|
1638
|
+
for item in content:
|
|
1639
|
+
if (
|
|
1640
|
+
isinstance(item, dict)
|
|
1641
|
+
and item.get('type') == 'image_url'
|
|
1642
|
+
):
|
|
1643
|
+
return True
|
|
1644
|
+
return False
|
|
1645
|
+
|
|
1646
|
+
def _strip_images_from_messages(
|
|
1647
|
+
self, messages: List[OpenAIMessage]
|
|
1648
|
+
) -> List[OpenAIMessage]:
|
|
1649
|
+
r"""Remove images from messages, keeping only text content."""
|
|
1650
|
+
stripped_messages = []
|
|
1651
|
+
for msg in messages:
|
|
1652
|
+
content = msg.get('content')
|
|
1653
|
+
if isinstance(content, list):
|
|
1654
|
+
# Extract only text content from multimodal messages
|
|
1655
|
+
text_content = ""
|
|
1656
|
+
for item in content:
|
|
1657
|
+
if isinstance(item, dict) and item.get('type') == 'text':
|
|
1658
|
+
text_content += item.get('text', '')
|
|
1659
|
+
|
|
1660
|
+
# Create new message with only text content
|
|
1661
|
+
new_msg = msg.copy()
|
|
1662
|
+
new_msg['content'] = (
|
|
1663
|
+
text_content
|
|
1664
|
+
or "[Image content removed - model doesn't support vision]"
|
|
1665
|
+
)
|
|
1666
|
+
stripped_messages.append(new_msg)
|
|
1667
|
+
else:
|
|
1668
|
+
# Regular text message, keep as is
|
|
1669
|
+
stripped_messages.append(msg)
|
|
1670
|
+
return stripped_messages
|
|
1671
|
+
|
|
1436
1672
|
def _get_model_response(
|
|
1437
1673
|
self,
|
|
1438
1674
|
openai_messages: List[OpenAIMessage],
|
|
@@ -1448,13 +1684,33 @@ class ChatAgent(BaseAgent):
|
|
|
1448
1684
|
openai_messages, response_format, tool_schemas or None
|
|
1449
1685
|
)
|
|
1450
1686
|
except Exception as exc:
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1687
|
+
# Try again without images if the error might be vision-related
|
|
1688
|
+
if self._is_vision_error(exc) and self._has_images(
|
|
1689
|
+
openai_messages
|
|
1690
|
+
):
|
|
1691
|
+
logger.warning(
|
|
1692
|
+
"Model appears to not support vision. Retrying without images." # noqa: E501
|
|
1693
|
+
)
|
|
1694
|
+
try:
|
|
1695
|
+
stripped_messages = self._strip_images_from_messages(
|
|
1696
|
+
openai_messages
|
|
1697
|
+
)
|
|
1698
|
+
response = self.model_backend.run(
|
|
1699
|
+
stripped_messages,
|
|
1700
|
+
response_format,
|
|
1701
|
+
tool_schemas or None,
|
|
1702
|
+
)
|
|
1703
|
+
except Exception:
|
|
1704
|
+
pass # Fall through to original error handling
|
|
1705
|
+
|
|
1706
|
+
if not response:
|
|
1707
|
+
logger.error(
|
|
1708
|
+
f"An error occurred while running model "
|
|
1709
|
+
f"{self.model_backend.model_type}, "
|
|
1710
|
+
f"index: {self.model_backend.current_model_index}",
|
|
1711
|
+
exc_info=exc,
|
|
1712
|
+
)
|
|
1713
|
+
error_info = str(exc)
|
|
1458
1714
|
|
|
1459
1715
|
if not response and self.model_backend.num_models > 1:
|
|
1460
1716
|
raise ModelProcessingError(
|
|
@@ -1496,13 +1752,33 @@ class ChatAgent(BaseAgent):
|
|
|
1496
1752
|
openai_messages, response_format, tool_schemas or None
|
|
1497
1753
|
)
|
|
1498
1754
|
except Exception as exc:
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1755
|
+
# Try again without images if the error might be vision-related
|
|
1756
|
+
if self._is_vision_error(exc) and self._has_images(
|
|
1757
|
+
openai_messages
|
|
1758
|
+
):
|
|
1759
|
+
logger.warning(
|
|
1760
|
+
"Model appears to not support vision. Retrying without images." # noqa: E501
|
|
1761
|
+
)
|
|
1762
|
+
try:
|
|
1763
|
+
stripped_messages = self._strip_images_from_messages(
|
|
1764
|
+
openai_messages
|
|
1765
|
+
)
|
|
1766
|
+
response = await self.model_backend.arun(
|
|
1767
|
+
stripped_messages,
|
|
1768
|
+
response_format,
|
|
1769
|
+
tool_schemas or None,
|
|
1770
|
+
)
|
|
1771
|
+
except Exception:
|
|
1772
|
+
pass # Fall through to original error handling
|
|
1773
|
+
|
|
1774
|
+
if not response:
|
|
1775
|
+
logger.error(
|
|
1776
|
+
f"An error occurred while running model "
|
|
1777
|
+
f"{self.model_backend.model_type}, "
|
|
1778
|
+
f"index: {self.model_backend.current_model_index}",
|
|
1779
|
+
exc_info=exc,
|
|
1780
|
+
)
|
|
1781
|
+
error_info = str(exc)
|
|
1506
1782
|
|
|
1507
1783
|
if not response and self.model_backend.num_models > 1:
|
|
1508
1784
|
raise ModelProcessingError(
|
|
@@ -1958,14 +2234,43 @@ class ChatAgent(BaseAgent):
|
|
|
1958
2234
|
tool_call_id = tool_call_request.tool_call_id
|
|
1959
2235
|
tool = self._internal_tools[func_name]
|
|
1960
2236
|
try:
|
|
1961
|
-
|
|
2237
|
+
raw_result = tool(**args)
|
|
2238
|
+
if self.mask_tool_output:
|
|
2239
|
+
self._secure_result_store[tool_call_id] = raw_result
|
|
2240
|
+
result = (
|
|
2241
|
+
"[The tool has been executed successfully, but the output"
|
|
2242
|
+
" from the tool is masked. You can move forward]"
|
|
2243
|
+
)
|
|
2244
|
+
mask_flag = True
|
|
2245
|
+
else:
|
|
2246
|
+
result = raw_result
|
|
2247
|
+
mask_flag = False
|
|
1962
2248
|
except Exception as e:
|
|
1963
2249
|
# Capture the error message to prevent framework crash
|
|
1964
2250
|
error_msg = f"Error executing tool '{func_name}': {e!s}"
|
|
1965
|
-
result =
|
|
2251
|
+
result = f"Tool execution failed: {error_msg}"
|
|
2252
|
+
mask_flag = False
|
|
1966
2253
|
logging.warning(error_msg)
|
|
1967
2254
|
|
|
1968
|
-
|
|
2255
|
+
# Check if result is a ToolResult with images
|
|
2256
|
+
images_to_attach = None
|
|
2257
|
+
if isinstance(result, ToolResult):
|
|
2258
|
+
images_to_attach = result.images
|
|
2259
|
+
result = str(result) # Use string representation for storage
|
|
2260
|
+
|
|
2261
|
+
tool_record = self._record_tool_calling(
|
|
2262
|
+
func_name, args, result, tool_call_id, mask_output=mask_flag
|
|
2263
|
+
)
|
|
2264
|
+
|
|
2265
|
+
# Store images for later attachment to next user message
|
|
2266
|
+
if images_to_attach:
|
|
2267
|
+
tool_record.images = images_to_attach
|
|
2268
|
+
# Add images with duplicate prevention
|
|
2269
|
+
for img in images_to_attach:
|
|
2270
|
+
if img not in self._pending_images:
|
|
2271
|
+
self._pending_images.append(img)
|
|
2272
|
+
|
|
2273
|
+
return tool_record
|
|
1969
2274
|
|
|
1970
2275
|
async def _aexecute_tool(
|
|
1971
2276
|
self,
|
|
@@ -2007,7 +2312,25 @@ class ChatAgent(BaseAgent):
|
|
|
2007
2312
|
result = {"error": error_msg}
|
|
2008
2313
|
logging.warning(error_msg)
|
|
2009
2314
|
|
|
2010
|
-
|
|
2315
|
+
# Check if result is a ToolResult with images
|
|
2316
|
+
images_to_attach = None
|
|
2317
|
+
if isinstance(result, ToolResult):
|
|
2318
|
+
images_to_attach = result.images
|
|
2319
|
+
result = str(result) # Use string representation for storage
|
|
2320
|
+
|
|
2321
|
+
tool_record = self._record_tool_calling(
|
|
2322
|
+
func_name, args, result, tool_call_id
|
|
2323
|
+
)
|
|
2324
|
+
|
|
2325
|
+
# Store images for later attachment to next user message
|
|
2326
|
+
if images_to_attach:
|
|
2327
|
+
tool_record.images = images_to_attach
|
|
2328
|
+
# Add images with duplicate prevention
|
|
2329
|
+
for img in images_to_attach:
|
|
2330
|
+
if img not in self._pending_images:
|
|
2331
|
+
self._pending_images.append(img)
|
|
2332
|
+
|
|
2333
|
+
return tool_record
|
|
2011
2334
|
|
|
2012
2335
|
def _record_tool_calling(
|
|
2013
2336
|
self,
|
|
@@ -2015,9 +2338,23 @@ class ChatAgent(BaseAgent):
|
|
|
2015
2338
|
args: Dict[str, Any],
|
|
2016
2339
|
result: Any,
|
|
2017
2340
|
tool_call_id: str,
|
|
2341
|
+
mask_output: bool = False,
|
|
2018
2342
|
):
|
|
2019
2343
|
r"""Record the tool calling information in the memory, and return the
|
|
2020
2344
|
tool calling record.
|
|
2345
|
+
|
|
2346
|
+
Args:
|
|
2347
|
+
func_name (str): The name of the tool function called.
|
|
2348
|
+
args (Dict[str, Any]): The arguments passed to the tool.
|
|
2349
|
+
result (Any): The result returned by the tool execution.
|
|
2350
|
+
tool_call_id (str): A unique identifier for the tool call.
|
|
2351
|
+
mask_output (bool, optional): Whether to return a sanitized
|
|
2352
|
+
placeholder instead of the raw tool output.
|
|
2353
|
+
(default: :obj:`False`)
|
|
2354
|
+
|
|
2355
|
+
Returns:
|
|
2356
|
+
ToolCallingRecord: A struct containing information about
|
|
2357
|
+
this tool call.
|
|
2021
2358
|
"""
|
|
2022
2359
|
assist_msg = FunctionCallingMessage(
|
|
2023
2360
|
role_name=self.role_name,
|
|
@@ -2036,6 +2373,7 @@ class ChatAgent(BaseAgent):
|
|
|
2036
2373
|
func_name=func_name,
|
|
2037
2374
|
result=result,
|
|
2038
2375
|
tool_call_id=tool_call_id,
|
|
2376
|
+
mask_output=mask_output,
|
|
2039
2377
|
)
|
|
2040
2378
|
|
|
2041
2379
|
# Use precise timestamps to ensure correct ordering
|
|
@@ -2140,6 +2478,7 @@ class ChatAgent(BaseAgent):
|
|
|
2140
2478
|
),
|
|
2141
2479
|
max_iteration=self.max_iteration,
|
|
2142
2480
|
stop_event=self.stop_event,
|
|
2481
|
+
pause_event=self.pause_event,
|
|
2143
2482
|
)
|
|
2144
2483
|
|
|
2145
2484
|
# Copy memory if requested
|
camel/messages/base.py
CHANGED
|
@@ -437,12 +437,8 @@ class BaseMessage:
|
|
|
437
437
|
if self.image_list and len(self.image_list) > 0:
|
|
438
438
|
for image in self.image_list:
|
|
439
439
|
if image.format is None:
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
f"transform the `PIL.Image.Image` to one of "
|
|
443
|
-
f"following supported formats, such as "
|
|
444
|
-
f"{list(OpenAIImageType)}"
|
|
445
|
-
)
|
|
440
|
+
# Set default format to PNG as fallback
|
|
441
|
+
image.format = 'PNG'
|
|
446
442
|
|
|
447
443
|
image_type: str = image.format.lower()
|
|
448
444
|
if image_type not in OpenAIImageType:
|
camel/messages/func_message.py
CHANGED
|
@@ -47,12 +47,16 @@ class FunctionCallingMessage(BaseMessage):
|
|
|
47
47
|
(default: :obj:`None`)
|
|
48
48
|
tool_call_id (Optional[str]): The ID of the tool call, if available.
|
|
49
49
|
(default: :obj:`None`)
|
|
50
|
+
mask_output (Optional[bool]): Whether to return a sanitized placeholder
|
|
51
|
+
instead of the raw tool output.
|
|
52
|
+
(default: :obj:`False`)
|
|
50
53
|
"""
|
|
51
54
|
|
|
52
55
|
func_name: Optional[str] = None
|
|
53
56
|
args: Optional[Dict] = None
|
|
54
57
|
result: Optional[Any] = None
|
|
55
58
|
tool_call_id: Optional[str] = None
|
|
59
|
+
mask_output: Optional[bool] = False
|
|
56
60
|
|
|
57
61
|
def to_openai_message(
|
|
58
62
|
self,
|
|
@@ -105,10 +109,13 @@ class FunctionCallingMessage(BaseMessage):
|
|
|
105
109
|
# This is a function response
|
|
106
110
|
# TODO: Allow for more flexible setting of tool role,
|
|
107
111
|
# optionally to be the same as assistant messages
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
+
if self.mask_output:
|
|
113
|
+
content = "[MASKED]"
|
|
114
|
+
else:
|
|
115
|
+
content = function_format.format_tool_response(
|
|
116
|
+
self.func_name, # type: ignore[arg-type]
|
|
117
|
+
self.result, # type: ignore[arg-type]
|
|
118
|
+
)
|
|
112
119
|
return ShareGPTMessage(from_="tool", value=content) # type: ignore[call-arg]
|
|
113
120
|
|
|
114
121
|
def to_openai_assistant_message(self) -> OpenAIAssistantMessage:
|
|
@@ -154,10 +161,30 @@ class FunctionCallingMessage(BaseMessage):
|
|
|
154
161
|
" due to missing function name."
|
|
155
162
|
)
|
|
156
163
|
|
|
157
|
-
|
|
164
|
+
if self.mask_output:
|
|
165
|
+
result_content = "[MASKED]"
|
|
166
|
+
else:
|
|
167
|
+
result_content = str(self.result)
|
|
158
168
|
|
|
159
169
|
return {
|
|
160
170
|
"role": "tool",
|
|
161
171
|
"content": result_content,
|
|
162
172
|
"tool_call_id": self.tool_call_id or "null",
|
|
163
173
|
}
|
|
174
|
+
|
|
175
|
+
def to_dict(self) -> Dict:
|
|
176
|
+
r"""Converts the message to a dictionary.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
dict: The converted dictionary.
|
|
180
|
+
"""
|
|
181
|
+
base = super().to_dict()
|
|
182
|
+
base["func_name"] = self.func_name
|
|
183
|
+
if self.args is not None:
|
|
184
|
+
base["args"] = self.args
|
|
185
|
+
if self.result is not None:
|
|
186
|
+
base["result"] = self.result
|
|
187
|
+
if self.tool_call_id is not None:
|
|
188
|
+
base["tool_call_id"] = self.tool_call_id
|
|
189
|
+
base["mask_output"] = self.mask_output
|
|
190
|
+
return base
|