camel-ai 0.2.71a2__py3-none-any.whl → 0.2.71a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/chat_agent.py +297 -16
  4. camel/interpreters/docker_interpreter.py +3 -2
  5. camel/loaders/base_loader.py +85 -0
  6. camel/messages/base.py +2 -6
  7. camel/services/agent_openapi_server.py +380 -0
  8. camel/societies/workforce/workforce.py +144 -33
  9. camel/toolkits/__init__.py +7 -4
  10. camel/toolkits/craw4ai_toolkit.py +2 -2
  11. camel/toolkits/file_write_toolkit.py +6 -6
  12. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
  13. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
  14. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
  15. camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1008 -0
  17. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
  18. camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +202 -23
  19. camel/toolkits/note_taking_toolkit.py +90 -0
  20. camel/toolkits/openai_image_toolkit.py +292 -0
  21. camel/toolkits/slack_toolkit.py +4 -4
  22. camel/toolkits/terminal_toolkit.py +223 -73
  23. camel/types/agents/tool_calling_record.py +4 -1
  24. camel/types/enums.py +24 -24
  25. camel/utils/mcp_client.py +37 -1
  26. camel/utils/tool_result.py +44 -0
  27. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/METADATA +58 -5
  28. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/RECORD +30 -26
  29. camel/toolkits/dalle_toolkit.py +0 -175
  30. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
  31. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/WHEEL +0 -0
  32. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py CHANGED
@@ -14,7 +14,7 @@
14
14
 
15
15
  from camel.logger import disable_logging, enable_logging, set_log_level
16
16
 
17
- __version__ = '0.2.71a2'
17
+ __version__ = '0.2.71a4'
18
18
 
19
19
  __all__ = [
20
20
  '__version__',
camel/agents/_types.py CHANGED
@@ -14,6 +14,7 @@
14
14
  from typing import Any, Dict, List, Optional, Union
15
15
 
16
16
  from openai import AsyncStream, Stream
17
+ from openai.types.chat import ChatCompletionChunk
17
18
  from pydantic import BaseModel, ConfigDict
18
19
 
19
20
  from camel.messages import BaseMessage
@@ -32,8 +33,11 @@ class ModelResponse(BaseModel):
32
33
  r"""The response from the model."""
33
34
 
34
35
  model_config = ConfigDict(arbitrary_types_allowed=True)
35
-
36
- response: Union[ChatCompletion, Stream, AsyncStream]
36
+ response: Union[
37
+ ChatCompletion,
38
+ Stream[ChatCompletionChunk],
39
+ AsyncStream[ChatCompletionChunk],
40
+ ]
37
41
  tool_call_requests: Optional[List[ToolCallRequest]]
38
42
  output_messages: List[BaseMessage]
39
43
  finish_reasons: List[str]
@@ -85,6 +85,7 @@ from camel.utils import (
85
85
  model_from_json_schema,
86
86
  )
87
87
  from camel.utils.commons import dependencies_required
88
+ from camel.utils.tool_result import ToolResult
88
89
 
89
90
  if TYPE_CHECKING:
90
91
  from camel.terminators import ResponseTerminator
@@ -291,12 +292,17 @@ class ChatAgent(BaseAgent):
291
292
  self.stop_event = stop_event
292
293
  self.mask_tool_output = mask_tool_output
293
294
  self._secure_result_store: Dict[str, Any] = {}
295
+ self._pending_images: List[str] = []
296
+ self._image_retry_count: Dict[str, int] = {}
297
+ # Store images to attach to next user message
294
298
  self.pause_event = pause_event
295
299
 
296
300
  def reset(self):
297
301
  r"""Resets the :obj:`ChatAgent` to its initial state."""
298
302
  self.terminated = False
299
303
  self.init_messages()
304
+ self._pending_images = []
305
+ self._image_retry_count = {}
300
306
  for terminator in self.response_terminators:
301
307
  terminator.reset()
302
308
 
@@ -1140,6 +1146,16 @@ class ChatAgent(BaseAgent):
1140
1146
  role_name="User", content=input_message
1141
1147
  )
1142
1148
 
1149
+ # Attach any pending images from previous tool calls
1150
+ image_list = self._process_pending_images()
1151
+ if image_list:
1152
+ # Create new message with images attached
1153
+ input_message = BaseMessage.make_user_message(
1154
+ role_name="User",
1155
+ content=input_message.content,
1156
+ image_list=image_list,
1157
+ )
1158
+
1143
1159
  # Add user input to memory
1144
1160
  self.update_memory(input_message, OpenAIBackendRole.USER)
1145
1161
 
@@ -1297,6 +1313,16 @@ class ChatAgent(BaseAgent):
1297
1313
  role_name="User", content=input_message
1298
1314
  )
1299
1315
 
1316
+ # Attach any pending images from previous tool calls
1317
+ image_list = self._process_pending_images()
1318
+ if image_list:
1319
+ # Create new message with images attached
1320
+ input_message = BaseMessage.make_user_message(
1321
+ role_name="User",
1322
+ content=input_message.content,
1323
+ image_list=image_list,
1324
+ )
1325
+
1300
1326
  self.update_memory(input_message, OpenAIBackendRole.USER)
1301
1327
 
1302
1328
  tool_call_records: List[ToolCallingRecord] = []
@@ -1343,6 +1369,7 @@ class ChatAgent(BaseAgent):
1343
1369
 
1344
1370
  if tool_call_requests := response.tool_call_requests:
1345
1371
  # Process all tool calls
1372
+ new_images_from_tools = []
1346
1373
  for tool_call_request in tool_call_requests:
1347
1374
  if (
1348
1375
  tool_call_request.tool_name
@@ -1362,10 +1389,72 @@ class ChatAgent(BaseAgent):
1362
1389
  )
1363
1390
  tool_call_records.append(tool_call_record)
1364
1391
 
1392
+ # Check if this tool call produced images
1393
+ if (
1394
+ hasattr(tool_call_record, 'images')
1395
+ and tool_call_record.images
1396
+ ):
1397
+ new_images_from_tools.extend(
1398
+ tool_call_record.images
1399
+ )
1400
+
1365
1401
  # If we found an external tool call, break the loop
1366
1402
  if external_tool_call_requests:
1367
1403
  break
1368
1404
 
1405
+ # If tools produced images
1406
+ # send them to the model as a user message
1407
+ if new_images_from_tools:
1408
+ # Convert base64 images to PIL Images
1409
+ image_list = []
1410
+ for img_data in new_images_from_tools:
1411
+ try:
1412
+ import base64
1413
+ import io
1414
+
1415
+ from PIL import Image
1416
+
1417
+ # Extract base64 data from data URL format
1418
+ if img_data.startswith("data:image"):
1419
+ # Format:
1420
+ # "data:image/png;base64,iVBORw0KGgo..."
1421
+ base64_data = img_data.split(',', 1)[1]
1422
+ else:
1423
+ # Raw base64 data
1424
+ base64_data = img_data
1425
+
1426
+ # Decode and create PIL Image
1427
+ image_bytes = base64.b64decode(base64_data)
1428
+ pil_image = Image.open(io.BytesIO(image_bytes))
1429
+ # Convert to ensure proper
1430
+ # Image.Image type for compatibility
1431
+ pil_image_tool_result: Image.Image = (
1432
+ pil_image.convert('RGB')
1433
+ )
1434
+ image_list.append(pil_image_tool_result)
1435
+
1436
+ except Exception as e:
1437
+ logger.warning(
1438
+ f"Failed to convert "
1439
+ f"base64 image to PIL for immediate use: {e}"
1440
+ )
1441
+ continue
1442
+
1443
+ # If we have valid images
1444
+ # create a user message with images
1445
+ if image_list:
1446
+ # Create a user message with images
1447
+ # to provide visual context immediately
1448
+ image_message = BaseMessage.make_user_message(
1449
+ role_name="User",
1450
+ content="[Visual content from tool execution - please analyze and continue]", # noqa: E501
1451
+ image_list=image_list,
1452
+ )
1453
+
1454
+ self.update_memory(
1455
+ image_message, OpenAIBackendRole.USER
1456
+ )
1457
+
1369
1458
  if (
1370
1459
  self.max_iteration is not None
1371
1460
  and iteration_count >= self.max_iteration
@@ -1452,6 +1541,69 @@ class ChatAgent(BaseAgent):
1452
1541
  info=info,
1453
1542
  )
1454
1543
 
1544
+ def _process_pending_images(self) -> List:
1545
+ r"""Process pending images with retry logic and return PIL Image list.
1546
+
1547
+ Returns:
1548
+ List: List of successfully converted PIL Images.
1549
+ """
1550
+ if not self._pending_images:
1551
+ return []
1552
+
1553
+ image_list = []
1554
+ successfully_processed = []
1555
+ failed_images = []
1556
+
1557
+ for img_data in self._pending_images:
1558
+ # Track retry count
1559
+ retry_count = self._image_retry_count.get(img_data, 0)
1560
+
1561
+ # Remove images that have failed too many times (max 3 attempts)
1562
+ if retry_count >= 3:
1563
+ failed_images.append(img_data)
1564
+ logger.warning(
1565
+ f"Removing image after {retry_count} failed attempts"
1566
+ )
1567
+ continue
1568
+
1569
+ try:
1570
+ import base64
1571
+ import io
1572
+
1573
+ from PIL import Image
1574
+
1575
+ # Extract base64 data from data URL format
1576
+ if img_data.startswith("data:image"):
1577
+ # Format: "data:image/png;base64,iVBORw0KGgo..."
1578
+ base64_data = img_data.split(',', 1)[1]
1579
+ else:
1580
+ # Raw base64 data
1581
+ base64_data = img_data
1582
+
1583
+ # Decode and create PIL Image
1584
+ image_bytes = base64.b64decode(base64_data)
1585
+ pil_image = Image.open(io.BytesIO(image_bytes))
1586
+ pil_image_converted: Image.Image = pil_image.convert('RGB')
1587
+ image_list.append(pil_image_converted)
1588
+ successfully_processed.append(img_data)
1589
+
1590
+ except Exception as e:
1591
+ # Increment retry count for failed conversion
1592
+ self._image_retry_count[img_data] = retry_count + 1
1593
+ logger.warning(
1594
+ f"Failed to convert base64 image to PIL "
1595
+ f"(attempt {retry_count + 1}/3): {e}"
1596
+ )
1597
+ continue
1598
+
1599
+ # Clean up processed and failed images
1600
+ for img in successfully_processed + failed_images:
1601
+ self._pending_images.remove(img)
1602
+ # Clean up retry count for processed/removed images
1603
+ self._image_retry_count.pop(img, None)
1604
+
1605
+ return image_list
1606
+
1455
1607
  def _record_final_output(self, output_messages: List[BaseMessage]) -> None:
1456
1608
  r"""Log final messages or warnings about multiple responses."""
1457
1609
  if len(output_messages) == 1:
@@ -1462,6 +1614,61 @@ class ChatAgent(BaseAgent):
1462
1614
  "selected message manually using `record_message()`."
1463
1615
  )
1464
1616
 
1617
+ def _is_vision_error(self, exc: Exception) -> bool:
1618
+ r"""Check if the exception is likely related to vision/image is not
1619
+ supported by the model."""
1620
+ # TODO: more robust vision error detection
1621
+ error_msg = str(exc).lower()
1622
+ vision_keywords = [
1623
+ 'vision',
1624
+ 'image',
1625
+ 'multimodal',
1626
+ 'unsupported',
1627
+ 'invalid content type',
1628
+ 'image_url',
1629
+ 'visual',
1630
+ ]
1631
+ return any(keyword in error_msg for keyword in vision_keywords)
1632
+
1633
+ def _has_images(self, messages: List[OpenAIMessage]) -> bool:
1634
+ r"""Check if any message contains images."""
1635
+ for msg in messages:
1636
+ content = msg.get('content')
1637
+ if isinstance(content, list):
1638
+ for item in content:
1639
+ if (
1640
+ isinstance(item, dict)
1641
+ and item.get('type') == 'image_url'
1642
+ ):
1643
+ return True
1644
+ return False
1645
+
1646
+ def _strip_images_from_messages(
1647
+ self, messages: List[OpenAIMessage]
1648
+ ) -> List[OpenAIMessage]:
1649
+ r"""Remove images from messages, keeping only text content."""
1650
+ stripped_messages = []
1651
+ for msg in messages:
1652
+ content = msg.get('content')
1653
+ if isinstance(content, list):
1654
+ # Extract only text content from multimodal messages
1655
+ text_content = ""
1656
+ for item in content:
1657
+ if isinstance(item, dict) and item.get('type') == 'text':
1658
+ text_content += item.get('text', '')
1659
+
1660
+ # Create new message with only text content
1661
+ new_msg = msg.copy()
1662
+ new_msg['content'] = (
1663
+ text_content
1664
+ or "[Image content removed - model doesn't support vision]"
1665
+ )
1666
+ stripped_messages.append(new_msg)
1667
+ else:
1668
+ # Regular text message, keep as is
1669
+ stripped_messages.append(msg)
1670
+ return stripped_messages
1671
+
1465
1672
  def _get_model_response(
1466
1673
  self,
1467
1674
  openai_messages: List[OpenAIMessage],
@@ -1477,13 +1684,33 @@ class ChatAgent(BaseAgent):
1477
1684
  openai_messages, response_format, tool_schemas or None
1478
1685
  )
1479
1686
  except Exception as exc:
1480
- logger.error(
1481
- f"An error occurred while running model "
1482
- f"{self.model_backend.model_type}, "
1483
- f"index: {self.model_backend.current_model_index}",
1484
- exc_info=exc,
1485
- )
1486
- error_info = str(exc)
1687
+ # Try again without images if the error might be vision-related
1688
+ if self._is_vision_error(exc) and self._has_images(
1689
+ openai_messages
1690
+ ):
1691
+ logger.warning(
1692
+ "Model appears to not support vision. Retrying without images." # noqa: E501
1693
+ )
1694
+ try:
1695
+ stripped_messages = self._strip_images_from_messages(
1696
+ openai_messages
1697
+ )
1698
+ response = self.model_backend.run(
1699
+ stripped_messages,
1700
+ response_format,
1701
+ tool_schemas or None,
1702
+ )
1703
+ except Exception:
1704
+ pass # Fall through to original error handling
1705
+
1706
+ if not response:
1707
+ logger.error(
1708
+ f"An error occurred while running model "
1709
+ f"{self.model_backend.model_type}, "
1710
+ f"index: {self.model_backend.current_model_index}",
1711
+ exc_info=exc,
1712
+ )
1713
+ error_info = str(exc)
1487
1714
 
1488
1715
  if not response and self.model_backend.num_models > 1:
1489
1716
  raise ModelProcessingError(
@@ -1525,13 +1752,33 @@ class ChatAgent(BaseAgent):
1525
1752
  openai_messages, response_format, tool_schemas or None
1526
1753
  )
1527
1754
  except Exception as exc:
1528
- logger.error(
1529
- f"An error occurred while running model "
1530
- f"{self.model_backend.model_type}, "
1531
- f"index: {self.model_backend.current_model_index}",
1532
- exc_info=exc,
1533
- )
1534
- error_info = str(exc)
1755
+ # Try again without images if the error might be vision-related
1756
+ if self._is_vision_error(exc) and self._has_images(
1757
+ openai_messages
1758
+ ):
1759
+ logger.warning(
1760
+ "Model appears to not support vision. Retrying without images." # noqa: E501
1761
+ )
1762
+ try:
1763
+ stripped_messages = self._strip_images_from_messages(
1764
+ openai_messages
1765
+ )
1766
+ response = await self.model_backend.arun(
1767
+ stripped_messages,
1768
+ response_format,
1769
+ tool_schemas or None,
1770
+ )
1771
+ except Exception:
1772
+ pass # Fall through to original error handling
1773
+
1774
+ if not response:
1775
+ logger.error(
1776
+ f"An error occurred while running model "
1777
+ f"{self.model_backend.model_type}, "
1778
+ f"index: {self.model_backend.current_model_index}",
1779
+ exc_info=exc,
1780
+ )
1781
+ error_info = str(exc)
1535
1782
 
1536
1783
  if not response and self.model_backend.num_models > 1:
1537
1784
  raise ModelProcessingError(
@@ -2005,10 +2252,26 @@ class ChatAgent(BaseAgent):
2005
2252
  mask_flag = False
2006
2253
  logging.warning(error_msg)
2007
2254
 
2008
- return self._record_tool_calling(
2255
+ # Check if result is a ToolResult with images
2256
+ images_to_attach = None
2257
+ if isinstance(result, ToolResult):
2258
+ images_to_attach = result.images
2259
+ result = str(result) # Use string representation for storage
2260
+
2261
+ tool_record = self._record_tool_calling(
2009
2262
  func_name, args, result, tool_call_id, mask_output=mask_flag
2010
2263
  )
2011
2264
 
2265
+ # Store images for later attachment to next user message
2266
+ if images_to_attach:
2267
+ tool_record.images = images_to_attach
2268
+ # Add images with duplicate prevention
2269
+ for img in images_to_attach:
2270
+ if img not in self._pending_images:
2271
+ self._pending_images.append(img)
2272
+
2273
+ return tool_record
2274
+
2012
2275
  async def _aexecute_tool(
2013
2276
  self,
2014
2277
  tool_call_request: ToolCallRequest,
@@ -2049,7 +2312,25 @@ class ChatAgent(BaseAgent):
2049
2312
  result = {"error": error_msg}
2050
2313
  logging.warning(error_msg)
2051
2314
 
2052
- return self._record_tool_calling(func_name, args, result, tool_call_id)
2315
+ # Check if result is a ToolResult with images
2316
+ images_to_attach = None
2317
+ if isinstance(result, ToolResult):
2318
+ images_to_attach = result.images
2319
+ result = str(result) # Use string representation for storage
2320
+
2321
+ tool_record = self._record_tool_calling(
2322
+ func_name, args, result, tool_call_id
2323
+ )
2324
+
2325
+ # Store images for later attachment to next user message
2326
+ if images_to_attach:
2327
+ tool_record.images = images_to_attach
2328
+ # Add images with duplicate prevention
2329
+ for img in images_to_attach:
2330
+ if img not in self._pending_images:
2331
+ self._pending_images.append(img)
2332
+
2333
+ return tool_record
2053
2334
 
2054
2335
  def _record_tool_calling(
2055
2336
  self,
@@ -146,8 +146,9 @@ class DockerInterpreter(BaseInterpreter):
146
146
  tar_stream = io.BytesIO()
147
147
  with tarfile.open(fileobj=tar_stream, mode='w') as tar:
148
148
  tarinfo = tarfile.TarInfo(name=filename)
149
- tarinfo.size = len(content)
150
- tar.addfile(tarinfo, io.BytesIO(content.encode('utf-8')))
149
+ encoded_content = content.encode('utf-8')
150
+ tarinfo.size = len(encoded_content)
151
+ tar.addfile(tarinfo, io.BytesIO(encoded_content))
151
152
  tar_stream.seek(0)
152
153
 
153
154
  # copy the tar into the container
@@ -0,0 +1,85 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from abc import ABC, abstractmethod
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Union
17
+
18
+
19
+ class BaseLoader(ABC):
20
+ r"""Abstract base class for all data loaders in CAMEL."""
21
+
22
+ @abstractmethod
23
+ def _load_single(self, source: Union[str, Path]) -> Dict[str, Any]:
24
+ r"""Load data from a single source.
25
+
26
+ Args:
27
+ source (Union[str, Path]): The data source to load from.
28
+
29
+ Returns:
30
+ Dict[str, Any]: A dictionary containing the loaded data. It is
31
+ recommended that the dictionary includes a "content" key with
32
+ the primary data and optional metadata keys.
33
+ """
34
+ pass
35
+
36
+ def load(
37
+ self,
38
+ source: Union[str, Path, List[Union[str, Path]]],
39
+ ) -> Dict[str, List[Dict[str, Any]]]:
40
+ r"""Load data from one or multiple sources.
41
+
42
+ Args:
43
+ source (Union[str, Path, List[Union[str, Path]]]): The data source
44
+ (s) to load from. Can be:
45
+ - A single path/URL (str or Path)
46
+ - A list of paths/URLs
47
+
48
+ Returns:
49
+ Dict[str, List[Dict[str, Any]]]: A dictionary with a single key
50
+ "contents" containing a list of loaded data. If a single source
51
+ is provided, the list will contain a single item.
52
+
53
+ Raises:
54
+ ValueError: If no sources are provided
55
+ Exception: If loading fails for any source
56
+ """
57
+ if not source:
58
+ raise ValueError("At least one source must be provided")
59
+
60
+ # Convert single source to list for uniform processing
61
+ sources = [source] if isinstance(source, (str, Path)) else list(source)
62
+
63
+ # Process all sources
64
+ results = []
65
+ for i, src in enumerate(sources, 1):
66
+ try:
67
+ content = self._load_single(src)
68
+ results.append(content)
69
+ except Exception as e:
70
+ raise RuntimeError(
71
+ f"Error loading source {i}/{len(sources)}: {src}"
72
+ ) from e
73
+
74
+ return {"contents": results}
75
+
76
+ @property
77
+ @abstractmethod
78
+ def supported_formats(self) -> set[str]:
79
+ r"""Get the set of supported file formats or data sources.
80
+
81
+ Returns:
82
+ set[str]: A set of strings representing the supported formats/
83
+ sources.
84
+ """
85
+ pass
camel/messages/base.py CHANGED
@@ -437,12 +437,8 @@ class BaseMessage:
437
437
  if self.image_list and len(self.image_list) > 0:
438
438
  for image in self.image_list:
439
439
  if image.format is None:
440
- raise ValueError(
441
- f"Image's `format` is `None`, please "
442
- f"transform the `PIL.Image.Image` to one of "
443
- f"following supported formats, such as "
444
- f"{list(OpenAIImageType)}"
445
- )
440
+ # Set default format to PNG as fallback
441
+ image.format = 'PNG'
446
442
 
447
443
  image_type: str = image.format.lower()
448
444
  if image_type not in OpenAIImageType: