PyPI - camel-ai - Versions diffs - 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl - Mend

camel-ai 0.2.71a1py3-none-any.whl → 0.2.71a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show

camel/__init__.py +1 -1
camel/agents/_types.py +6 -2
camel/agents/chat_agent.py +357 -18
camel/messages/base.py +2 -6
camel/messages/func_message.py +32 -5
camel/services/agent_openapi_server.py +380 -0
camel/societies/workforce/single_agent_worker.py +1 -5
camel/societies/workforce/workforce.py +68 -8
camel/tasks/task.py +2 -2
camel/toolkits/__init__.py +2 -2
camel/toolkits/craw4ai_toolkit.py +27 -7
camel/toolkits/file_write_toolkit.py +110 -31
camel/toolkits/human_toolkit.py +19 -14
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
camel/toolkits/jina_reranker_toolkit.py +3 -4
camel/toolkits/terminal_toolkit.py +189 -48
camel/toolkits/video_download_toolkit.py +1 -2
camel/types/agents/tool_calling_record.py +4 -1
camel/types/enums.py +24 -24
camel/utils/message_summarizer.py +148 -0
camel/utils/tool_result.py +44 -0
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +19 -5
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +31 -28
camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0

camel/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from camel.logger import disable_logging, enable_logging, set_log_level
-__version__ = '0.2.71a1'
+__version__ = '0.2.71a3'
 __all__ = [
     '__version__',

camel/agents/_types.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from typing import Any, Dict, List, Optional, Union
 from openai import AsyncStream, Stream
+from openai.types.chat import ChatCompletionChunk
 from pydantic import BaseModel, ConfigDict
 from camel.messages import BaseMessage
@@ -32,8 +33,11 @@ class ModelResponse(BaseModel):
     r"""The response from the model."""
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    response: Union[ChatCompletion, Stream, AsyncStream]
+    response: Union[
+        ChatCompletion,
+        Stream[ChatCompletionChunk],
+        AsyncStream[ChatCompletionChunk],
+    ]
     tool_call_requests: Optional[List[ToolCallRequest]]
     output_messages: List[BaseMessage]
     finish_reasons: List[str]

camel/agents/chat_agent.py CHANGED Viewed

@@ -13,10 +13,12 @@
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 from __future__ import annotations
+import asyncio
 import json
 import logging
 import textwrap
 import threading
+import time
 import uuid
 from collections import defaultdict
 from pathlib import Path
@@ -83,6 +85,7 @@ from camel.utils import (
     model_from_json_schema,
 )
 from camel.utils.commons import dependencies_required
+from camel.utils.tool_result import ToolResult
 if TYPE_CHECKING:
     from camel.terminators import ResponseTerminator
@@ -173,6 +176,11 @@ class ChatAgent(BaseAgent):
         stop_event (Optional[threading.Event], optional): Event to signal
             termination of the agent's operation. When set, the agent will
             terminate its execution. (default: :obj:`None`)
+        mask_tool_output (Optional[bool]): Whether to return a sanitized
+            placeholder instead of the raw tool output. (default: :obj:`False`)
+        pause_event (Optional[asyncio.Event]): Event to signal pause of the
+            agent's operation. When clear, the agent will pause its execution.
+            (default: :obj:`None`)
     """
     def __init__(
@@ -206,6 +214,8 @@ class ChatAgent(BaseAgent):
         max_iteration: Optional[int] = None,
         agent_id: Optional[str] = None,
         stop_event: Optional[threading.Event] = None,
+        mask_tool_output: bool = False,
+        pause_event: Optional[asyncio.Event] = None,
     ) -> None:
         if isinstance(model, ModelManager):
             self.model_backend = model
@@ -280,11 +290,19 @@ class ChatAgent(BaseAgent):
         self.response_terminators = response_terminators or []
         self.max_iteration = max_iteration
         self.stop_event = stop_event
+        self.mask_tool_output = mask_tool_output
+        self._secure_result_store: Dict[str, Any] = {}
+        self._pending_images: List[str] = []
+        self._image_retry_count: Dict[str, int] = {}
+        # Store images to attach to next user message
+        self.pause_event = pause_event
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
         self.init_messages()
+        self._pending_images = []
+        self._image_retry_count = {}
         for terminator in self.response_terminators:
             terminator.reset()
@@ -1128,6 +1146,16 @@ class ChatAgent(BaseAgent):
                 role_name="User", content=input_message
             )
+        # Attach any pending images from previous tool calls
+        image_list = self._process_pending_images()
+        if image_list:
+            # Create new message with images attached
+            input_message = BaseMessage.make_user_message(
+                role_name="User",
+                content=input_message.content,
+                image_list=image_list,
+            )
         # Add user input to memory
         self.update_memory(input_message, OpenAIBackendRole.USER)
@@ -1143,6 +1171,10 @@ class ChatAgent(BaseAgent):
         iteration_count = 0
         while True:
+            if self.pause_event is not None and not self.pause_event.is_set():
+                while not self.pause_event.is_set():
+                    time.sleep(0.001)
             try:
                 openai_messages, num_tokens = self.memory.get_context()
                 accumulated_context_tokens += num_tokens
@@ -1184,6 +1216,12 @@ class ChatAgent(BaseAgent):
                             external_tool_call_requests = []
                         external_tool_call_requests.append(tool_call_request)
                     else:
+                        if (
+                            self.pause_event is not None
+                            and not self.pause_event.is_set()
+                        ):
+                            while not self.pause_event.is_set():
+                                time.sleep(0.001)
                         tool_call_records.append(
                             self._execute_tool(tool_call_request)
                         )
@@ -1275,6 +1313,16 @@ class ChatAgent(BaseAgent):
                 role_name="User", content=input_message
             )
+        # Attach any pending images from previous tool calls
+        image_list = self._process_pending_images()
+        if image_list:
+            # Create new message with images attached
+            input_message = BaseMessage.make_user_message(
+                role_name="User",
+                content=input_message.content,
+                image_list=image_list,
+            )
         self.update_memory(input_message, OpenAIBackendRole.USER)
         tool_call_records: List[ToolCallingRecord] = []
@@ -1287,6 +1335,8 @@ class ChatAgent(BaseAgent):
         step_token_usage = self._create_token_usage_tracker()
         iteration_count = 0
         while True:
+            if self.pause_event is not None and not self.pause_event.is_set():
+                await self.pause_event.wait()
             try:
                 openai_messages, num_tokens = self.memory.get_context()
                 accumulated_context_tokens += num_tokens
@@ -1319,6 +1369,7 @@ class ChatAgent(BaseAgent):
             if tool_call_requests := response.tool_call_requests:
                 # Process all tool calls
+                new_images_from_tools = []
                 for tool_call_request in tool_call_requests:
                     if (
                         tool_call_request.tool_name
@@ -1328,15 +1379,82 @@ class ChatAgent(BaseAgent):
                             external_tool_call_requests = []
                         external_tool_call_requests.append(tool_call_request)
                     else:
+                        if (
+                            self.pause_event is not None
+                            and not self.pause_event.is_set()
+                        ):
+                            await self.pause_event.wait()
                         tool_call_record = await self._aexecute_tool(
                             tool_call_request
                         )
                         tool_call_records.append(tool_call_record)
+                        # Check if this tool call produced images
+                        if (
+                            hasattr(tool_call_record, 'images')
+                            and tool_call_record.images
+                        ):
+                            new_images_from_tools.extend(
+                                tool_call_record.images
+                            )
                 # If we found an external tool call, break the loop
                 if external_tool_call_requests:
                     break
+                # If tools produced images
+                # send them to the model as a user message
+                if new_images_from_tools:
+                    # Convert base64 images to PIL Images
+                    image_list = []
+                    for img_data in new_images_from_tools:
+                        try:
+                            import base64
+                            import io
+                            from PIL import Image
+                            # Extract base64 data from data URL format
+                            if img_data.startswith("data:image"):
+                                # Format:
+                                # "data:image/png;base64,iVBORw0KGgo..."
+                                base64_data = img_data.split(',', 1)[1]
+                            else:
+                                # Raw base64 data
+                                base64_data = img_data
+                            # Decode and create PIL Image
+                            image_bytes = base64.b64decode(base64_data)
+                            pil_image = Image.open(io.BytesIO(image_bytes))
+                            # Convert to ensure proper
+                            # Image.Image type for compatibility
+                            pil_image_tool_result: Image.Image = (
+                                pil_image.convert('RGB')
+                            )
+                            image_list.append(pil_image_tool_result)
+                        except Exception as e:
+                            logger.warning(
+                                f"Failed to convert "
+                                f"base64 image to PIL for immediate use: {e}"
+                            )
+                            continue
+                    # If we have valid images
+                    # create a user message with images
+                    if image_list:
+                        # Create a user message with images
+                        # to provide visual context immediately
+                        image_message = BaseMessage.make_user_message(
+                            role_name="User",
+                            content="[Visual content from tool execution - please analyze and continue]",  # noqa: E501
+                            image_list=image_list,
+                        )
+                        self.update_memory(
+                            image_message, OpenAIBackendRole.USER
+                        )
                 if (
                     self.max_iteration is not None
                     and iteration_count >= self.max_iteration
@@ -1423,6 +1541,69 @@ class ChatAgent(BaseAgent):
             info=info,
         )
+    def _process_pending_images(self) -> List:
+        r"""Process pending images with retry logic and return PIL Image list.
+        Returns:
+            List: List of successfully converted PIL Images.
+        """
+        if not self._pending_images:
+            return []
+        image_list = []
+        successfully_processed = []
+        failed_images = []
+        for img_data in self._pending_images:
+            # Track retry count
+            retry_count = self._image_retry_count.get(img_data, 0)
+            # Remove images that have failed too many times (max 3 attempts)
+            if retry_count >= 3:
+                failed_images.append(img_data)
+                logger.warning(
+                    f"Removing image after {retry_count} failed attempts"
+                )
+                continue
+            try:
+                import base64
+                import io
+                from PIL import Image
+                # Extract base64 data from data URL format
+                if img_data.startswith("data:image"):
+                    # Format: "data:image/png;base64,iVBORw0KGgo..."
+                    base64_data = img_data.split(',', 1)[1]
+                else:
+                    # Raw base64 data
+                    base64_data = img_data
+                # Decode and create PIL Image
+                image_bytes = base64.b64decode(base64_data)
+                pil_image = Image.open(io.BytesIO(image_bytes))
+                pil_image_converted: Image.Image = pil_image.convert('RGB')
+                image_list.append(pil_image_converted)
+                successfully_processed.append(img_data)
+            except Exception as e:
+                # Increment retry count for failed conversion
+                self._image_retry_count[img_data] = retry_count + 1
+                logger.warning(
+                    f"Failed to convert base64 image to PIL "
+                    f"(attempt {retry_count + 1}/3): {e}"
+                )
+                continue
+        # Clean up processed and failed images
+        for img in successfully_processed + failed_images:
+            self._pending_images.remove(img)
+            # Clean up retry count for processed/removed images
+            self._image_retry_count.pop(img, None)
+        return image_list
     def _record_final_output(self, output_messages: List[BaseMessage]) -> None:
         r"""Log final messages or warnings about multiple responses."""
         if len(output_messages) == 1:
@@ -1433,6 +1614,61 @@ class ChatAgent(BaseAgent):
                 "selected message manually using `record_message()`."
             )
+    def _is_vision_error(self, exc: Exception) -> bool:
+        r"""Check if the exception is likely related to vision/image is not
+        supported by the model."""
+        # TODO: more robust vision error detection
+        error_msg = str(exc).lower()
+        vision_keywords = [
+            'vision',
+            'image',
+            'multimodal',
+            'unsupported',
+            'invalid content type',
+            'image_url',
+            'visual',
+        ]
+        return any(keyword in error_msg for keyword in vision_keywords)
+    def _has_images(self, messages: List[OpenAIMessage]) -> bool:
+        r"""Check if any message contains images."""
+        for msg in messages:
+            content = msg.get('content')
+            if isinstance(content, list):
+                for item in content:
+                    if (
+                        isinstance(item, dict)
+                        and item.get('type') == 'image_url'
+                    ):
+                        return True
+        return False
+    def _strip_images_from_messages(
+        self, messages: List[OpenAIMessage]
+    ) -> List[OpenAIMessage]:
+        r"""Remove images from messages, keeping only text content."""
+        stripped_messages = []
+        for msg in messages:
+            content = msg.get('content')
+            if isinstance(content, list):
+                # Extract only text content from multimodal messages
+                text_content = ""
+                for item in content:
+                    if isinstance(item, dict) and item.get('type') == 'text':
+                        text_content += item.get('text', '')
+                # Create new message with only text content
+                new_msg = msg.copy()
+                new_msg['content'] = (
+                    text_content
+                    or "[Image content removed - model doesn't support vision]"
+                )
+                stripped_messages.append(new_msg)
+            else:
+                # Regular text message, keep as is
+                stripped_messages.append(msg)
+        return stripped_messages
     def _get_model_response(
         self,
         openai_messages: List[OpenAIMessage],
@@ -1448,13 +1684,33 @@ class ChatAgent(BaseAgent):
                 openai_messages, response_format, tool_schemas or None
             )
         except Exception as exc:
-            logger.error(
-                f"An error occurred while running model "
-                f"{self.model_backend.model_type}, "
-                f"index: {self.model_backend.current_model_index}",
-                exc_info=exc,
-            )
-            error_info = str(exc)
+            # Try again without images if the error might be vision-related
+            if self._is_vision_error(exc) and self._has_images(
+                openai_messages
+            ):
+                logger.warning(
+                    "Model appears to not support vision. Retrying without images."  # noqa: E501
+                )
+                try:
+                    stripped_messages = self._strip_images_from_messages(
+                        openai_messages
+                    )
+                    response = self.model_backend.run(
+                        stripped_messages,
+                        response_format,
+                        tool_schemas or None,
+                    )
+                except Exception:
+                    pass  # Fall through to original error handling
+            if not response:
+                logger.error(
+                    f"An error occurred while running model "
+                    f"{self.model_backend.model_type}, "
+                    f"index: {self.model_backend.current_model_index}",
+                    exc_info=exc,
+                )
+                error_info = str(exc)
         if not response and self.model_backend.num_models > 1:
             raise ModelProcessingError(
@@ -1496,13 +1752,33 @@ class ChatAgent(BaseAgent):
                 openai_messages, response_format, tool_schemas or None
             )
         except Exception as exc:
-            logger.error(
-                f"An error occurred while running model "
-                f"{self.model_backend.model_type}, "
-                f"index: {self.model_backend.current_model_index}",
-                exc_info=exc,
-            )
-            error_info = str(exc)
+            # Try again without images if the error might be vision-related
+            if self._is_vision_error(exc) and self._has_images(
+                openai_messages
+            ):
+                logger.warning(
+                    "Model appears to not support vision. Retrying without images."  # noqa: E501
+                )
+                try:
+                    stripped_messages = self._strip_images_from_messages(
+                        openai_messages
+                    )
+                    response = await self.model_backend.arun(
+                        stripped_messages,
+                        response_format,
+                        tool_schemas or None,
+                    )
+                except Exception:
+                    pass  # Fall through to original error handling
+            if not response:
+                logger.error(
+                    f"An error occurred while running model "
+                    f"{self.model_backend.model_type}, "
+                    f"index: {self.model_backend.current_model_index}",
+                    exc_info=exc,
+                )
+                error_info = str(exc)
         if not response and self.model_backend.num_models > 1:
             raise ModelProcessingError(
@@ -1958,14 +2234,43 @@ class ChatAgent(BaseAgent):
         tool_call_id = tool_call_request.tool_call_id
         tool = self._internal_tools[func_name]
         try:
-            result = tool(**args)
+            raw_result = tool(**args)
+            if self.mask_tool_output:
+                self._secure_result_store[tool_call_id] = raw_result
+                result = (
+                    "[The tool has been executed successfully, but the output"
+                    " from the tool is masked. You can move forward]"
+                )
+                mask_flag = True
+            else:
+                result = raw_result
+                mask_flag = False
         except Exception as e:
             # Capture the error message to prevent framework crash
             error_msg = f"Error executing tool '{func_name}': {e!s}"
-            result = {"error": error_msg}
+            result = f"Tool execution failed: {error_msg}"
+            mask_flag = False
             logging.warning(error_msg)
-        return self._record_tool_calling(func_name, args, result, tool_call_id)
+        # Check if result is a ToolResult with images
+        images_to_attach = None
+        if isinstance(result, ToolResult):
+            images_to_attach = result.images
+            result = str(result)  # Use string representation for storage
+        tool_record = self._record_tool_calling(
+            func_name, args, result, tool_call_id, mask_output=mask_flag
+        )
+        # Store images for later attachment to next user message
+        if images_to_attach:
+            tool_record.images = images_to_attach
+            # Add images with duplicate prevention
+            for img in images_to_attach:
+                if img not in self._pending_images:
+                    self._pending_images.append(img)
+        return tool_record
     async def _aexecute_tool(
         self,
@@ -2007,7 +2312,25 @@ class ChatAgent(BaseAgent):
             result = {"error": error_msg}
             logging.warning(error_msg)
-        return self._record_tool_calling(func_name, args, result, tool_call_id)
+        # Check if result is a ToolResult with images
+        images_to_attach = None
+        if isinstance(result, ToolResult):
+            images_to_attach = result.images
+            result = str(result)  # Use string representation for storage
+        tool_record = self._record_tool_calling(
+            func_name, args, result, tool_call_id
+        )
+        # Store images for later attachment to next user message
+        if images_to_attach:
+            tool_record.images = images_to_attach
+            # Add images with duplicate prevention
+            for img in images_to_attach:
+                if img not in self._pending_images:
+                    self._pending_images.append(img)
+        return tool_record
     def _record_tool_calling(
         self,
@@ -2015,9 +2338,23 @@ class ChatAgent(BaseAgent):
         args: Dict[str, Any],
         result: Any,
         tool_call_id: str,
+        mask_output: bool = False,
     ):
         r"""Record the tool calling information in the memory, and return the
         tool calling record.
+        Args:
+            func_name (str): The name of the tool function called.
+            args (Dict[str, Any]): The arguments passed to the tool.
+            result (Any): The result returned by the tool execution.
+            tool_call_id (str): A unique identifier for the tool call.
+            mask_output (bool, optional): Whether to return a sanitized
+                placeholder instead of the raw tool output.
+                (default: :obj:`False`)
+        Returns:
+            ToolCallingRecord: A struct containing information about
+            this tool call.
         """
         assist_msg = FunctionCallingMessage(
             role_name=self.role_name,
@@ -2036,6 +2373,7 @@ class ChatAgent(BaseAgent):
             func_name=func_name,
             result=result,
             tool_call_id=tool_call_id,
+            mask_output=mask_output,
         )
         # Use precise timestamps to ensure correct ordering
@@ -2140,6 +2478,7 @@ class ChatAgent(BaseAgent):
             ),
             max_iteration=self.max_iteration,
             stop_event=self.stop_event,
+            pause_event=self.pause_event,
         )
         # Copy memory if requested

camel/messages/base.py CHANGED Viewed

@@ -437,12 +437,8 @@ class BaseMessage:
         if self.image_list and len(self.image_list) > 0:
             for image in self.image_list:
                 if image.format is None:
-                    raise ValueError(
-                        f"Image's `format` is `None`, please "
-                        f"transform the `PIL.Image.Image` to  one of "
-                        f"following supported formats, such as "
-                        f"{list(OpenAIImageType)}"
-                    )
+                    # Set default format to PNG as fallback
+                    image.format = 'PNG'
                 image_type: str = image.format.lower()
                 if image_type not in OpenAIImageType:

camel/messages/func_message.py CHANGED Viewed

@@ -47,12 +47,16 @@ class FunctionCallingMessage(BaseMessage):
             (default: :obj:`None`)
         tool_call_id (Optional[str]): The ID of the tool call, if available.
             (default: :obj:`None`)
+        mask_output (Optional[bool]): Whether to return a sanitized placeholder
+            instead of the raw tool output.
+            (default: :obj:`False`)
     """
     func_name: Optional[str] = None
     args: Optional[Dict] = None
     result: Optional[Any] = None
     tool_call_id: Optional[str] = None
+    mask_output: Optional[bool] = False
     def to_openai_message(
         self,
@@ -105,10 +109,13 @@ class FunctionCallingMessage(BaseMessage):
             # This is a function response
             # TODO: Allow for more flexible setting of tool role,
             #  optionally to be the same as assistant messages
-            content = function_format.format_tool_response(
-                self.func_name,  # type: ignore[arg-type]
-                self.result,  # type: ignore[arg-type]
-            )
+            if self.mask_output:
+                content = "[MASKED]"
+            else:
+                content = function_format.format_tool_response(
+                    self.func_name,  # type: ignore[arg-type]
+                    self.result,  # type: ignore[arg-type]
+                )
             return ShareGPTMessage(from_="tool", value=content)  # type: ignore[call-arg]
     def to_openai_assistant_message(self) -> OpenAIAssistantMessage:
@@ -154,10 +161,30 @@ class FunctionCallingMessage(BaseMessage):
                 " due to missing function name."
             )
-        result_content = str(self.result)
+        if self.mask_output:
+            result_content = "[MASKED]"
+        else:
+            result_content = str(self.result)
         return {
             "role": "tool",
             "content": result_content,
             "tool_call_id": self.tool_call_id or "null",
         }
+    def to_dict(self) -> Dict:
+        r"""Converts the message to a dictionary.
+        Returns:
+            dict: The converted dictionary.
+        """
+        base = super().to_dict()
+        base["func_name"] = self.func_name
+        if self.args is not None:
+            base["args"] = self.args
+        if self.result is not None:
+            base["result"] = self.result
+        if self.tool_call_id is not None:
+            base["tool_call_id"] = self.tool_call_id
+        base["mask_output"] = self.mask_output
+        return base

camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.71a1py3-none-any.whl → 0.2.71a3py3-none-any.whl