PyPI - camel-ai - Versions diffs - 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl - Mend

camel-ai 0.2.71a1py3-none-any.whl → 0.2.71a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show

camel/__init__.py +1 -1
camel/agents/_types.py +6 -2
camel/agents/chat_agent.py +357 -18
camel/messages/base.py +2 -6
camel/messages/func_message.py +32 -5
camel/services/agent_openapi_server.py +380 -0
camel/societies/workforce/single_agent_worker.py +1 -5
camel/societies/workforce/workforce.py +68 -8
camel/tasks/task.py +2 -2
camel/toolkits/__init__.py +2 -2
camel/toolkits/craw4ai_toolkit.py +27 -7
camel/toolkits/file_write_toolkit.py +110 -31
camel/toolkits/human_toolkit.py +19 -14
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
camel/toolkits/jina_reranker_toolkit.py +3 -4
camel/toolkits/terminal_toolkit.py +189 -48
camel/toolkits/video_download_toolkit.py +1 -2
camel/types/agents/tool_calling_record.py +4 -1
camel/types/enums.py +24 -24
camel/utils/message_summarizer.py +148 -0
camel/utils/tool_result.py +44 -0
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +19 -5
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +31 -28
camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
{camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0

camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py RENAMED Viewed

@@ -12,7 +12,7 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 if TYPE_CHECKING:
     from playwright.async_api import Page
@@ -64,7 +64,17 @@ class PageSnapshot:
             )
             logger.debug("Capturing page snapshot …")
-            snapshot_text = await self._get_snapshot_direct()
+            snapshot_result = await self._get_snapshot_direct()
+            # Extract snapshot text from the unified analyzer result
+            if (
+                isinstance(snapshot_result, dict)
+                and 'snapshotText' in snapshot_result
+            ):
+                snapshot_text = snapshot_result['snapshotText']
+            else:
+                snapshot_text = snapshot_result
             formatted = self._format_snapshot(snapshot_text or "<empty>")
             output = formatted
@@ -99,7 +109,9 @@ class PageSnapshot:
     # ------------------------------------------------------------------
     _snapshot_js_cache: Optional[str] = None  # class-level cache
-    async def _get_snapshot_direct(self) -> Optional[str]:
+    async def _get_snapshot_direct(
+        self,
+    ) -> Optional[Union[str, Dict[str, Any]]]:
         r"""Evaluate the snapshot-extraction JS with simple retry logic.
         Playwright throws *Execution context was destroyed* when a new page
@@ -110,7 +122,7 @@ class PageSnapshot:
         # Load JS once and cache it at class level
         if PageSnapshot._snapshot_js_cache is None:
-            js_path = Path(__file__).parent / "snapshot.js"
+            js_path = Path(__file__).parent / "unified_analyzer.js"
             PageSnapshot._snapshot_js_cache = js_path.read_text(
                 encoding="utf-8"
             )

camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} RENAMED Viewed

@@ -1,6 +1,13 @@
 (() => {
-    // Playwright's snapshot logic focuses on semantics and visibility, not arbitrary limits.
-    // We will first build a semantic tree in memory, then render it.
+    // Unified analyzer that combines visual and structural analysis
+    // Preserves complete snapshot.js logic while adding visual coordinate information
+    let refCounter = 1;
+    function generateRef() {
+        return `e${refCounter++}`;
+    }
+    // === Complete snapshot.js logic preservation ===
     function isVisible(node) {
         if (node.nodeType !== Node.ELEMENT_NODE) return true;
@@ -70,13 +77,9 @@
         return result;
     }
-    let refCounter = 1;
-    function generateRef() {
-        return `e${refCounter++}`;
-    }
     /**
      * Phase 1: Build an in-memory representation of the accessibility tree.
+     * Complete preservation of snapshot.js buildAriaTree logic
      */
     function buildAriaTree(rootElement) {
         const visited = new Set();
@@ -153,9 +156,34 @@
                 }
             }
-            // FIX: If an element's name is the same as its only text child, remove the redundant child.
-            if (ariaNode && ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && ariaNode.name === ariaNode.children[0]) {
-                ariaNode.children = [];
+            // FIX: Remove redundant text children that match the element's name
+            if (ariaNode && ariaNode.children.length > 0) {
+                // Remove text children that are the same as the parent's name or are contained in it
+                ariaNode.children = ariaNode.children.filter(child => {
+                    if (typeof child === 'string') {
+                        const childText = child.trim();
+                        const parentName = ariaNode.name.trim();
+                        // Remove if text child exactly matches parent name
+                        if (childText === parentName) {
+                            return false;
+                        }
+                        // Also remove if the child text is completely contained in parent name
+                        // and represents a significant portion (to avoid removing important partial text)
+                        if (childText.length > 3 && parentName.includes(childText)) {
+                            return false;
+                        }
+                        return true;
+                    }
+                    return true;
+                });
+                // If after filtering, we have only one text child that equals the name, remove it
+                if (ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && ariaNode.name === ariaNode.children[0]) {
+                    ariaNode.children = [];
+                }
             }
         }
@@ -166,7 +194,7 @@
     /**
      * Phase 2: Normalize the tree by removing redundant generic wrappers.
-     * This is a key optimization in Playwright to simplify the structure.
+     * Complete preservation of snapshot.js normalizeTree logic
      */
     function normalizeTree(node) {
         if (typeof node === 'string') return [node];
@@ -178,6 +206,24 @@
         node.children = newChildren;
         // Remove child elements that have the same name as their parent
+        const filteredChildren = [];
+        for (const child of node.children) {
+            if (typeof child !== 'string' && child.name && node.name) {
+                const childName = child.name.trim();
+                const parentName = node.name.trim();
+                if (childName === parentName) {
+                    // If child has same name as parent, merge its children into parent
+                    filteredChildren.push(...(child.children || []));
+                } else {
+                    filteredChildren.push(child);
+                }
+            } else {
+                filteredChildren.push(child);
+            }
+        }
+        node.children = filteredChildren;
+        // Also handle the case where we have only one child with same name
         if (node.children.length === 1 && typeof node.children[0] !== 'string') {
             const child = node.children[0];
             if (child.name && node.name && child.name.trim() === node.name.trim()) {
@@ -195,9 +241,9 @@
         return [node];
     }
     /**
      * Phase 3: Render the normalized tree into the final string format.
+     * Complete preservation of snapshot.js renderTree logic
      */
     function renderTree(node, indent = '') {
         const lines = [];
@@ -263,6 +309,116 @@
         return lines;
     }
-    const outputLines = processDocument(document);
-    return outputLines.join('\n');
-})();
+    // === Visual analysis functions from page_script.js ===
+    // From page_script.js - check if element is topmost at coordinates
+    function isTopmost(element, x, y) {
+        let hit = document.elementFromPoint(x, y);
+        if (hit === null) return true;
+        while (hit) {
+            if (hit == element) return true;
+            hit = hit.parentNode;
+        }
+        return false;
+    }
+    // From page_script.js - get visual coordinates
+    function getElementCoordinates(element) {
+        let rects = element.getClientRects();
+        let scale = window.devicePixelRatio || 1;
+        let validRects = [];
+        for (const rect of rects) {
+            let x = rect.left + rect.width / 2;
+            let y = rect.top + rect.height / 2;
+            if (isTopmost(element, x, y)) {
+                validRects.push({
+                    x: rect.x * scale,
+                    y: rect.y * scale,
+                    width: rect.width * scale,
+                    height: rect.height * scale,
+                    top: rect.top * scale,
+                    left: rect.left * scale,
+                    right: rect.right * scale,
+                    bottom: rect.bottom * scale
+                });
+            }
+        }
+        return validRects;
+    }
+    // === Unified analysis function ===
+    function collectElementsFromTree(node, elementsMap) {
+        if (typeof node === 'string') return;
+        if (node.element && node.ref) {
+            // Get visual coordinates for this element
+            const coordinates = getElementCoordinates(node.element);
+            // Store comprehensive element information
+            elementsMap[node.ref] = {
+                // Structural information (preserved from snapshot.js)
+                role: node.role,
+                name: node.name,
+                tagName: node.element.tagName.toLowerCase(),
+                disabled: node.disabled,
+                checked: node.checked,
+                expanded: node.expanded,
+                // Visual information (from page_script.js)
+                coordinates: coordinates,
+                // Additional metadata
+                href: node.element.href || null,
+                value: node.element.value || null,
+                placeholder: node.element.placeholder || null,
+                scrollable: node.element.scrollHeight > node.element.clientHeight
+            };
+        }
+        // Recursively process children
+        if (node.children) {
+            for (const child of node.children) {
+                collectElementsFromTree(child, elementsMap);
+            }
+        }
+    }
+    function analyzePageElements() {
+        // Generate the complete structured snapshot using original snapshot.js logic
+        const outputLines = processDocument(document);
+        const snapshotText = outputLines.join('\n');
+        // Build the tree again to collect element information with visual data
+        textCache.clear();
+        refCounter = 1; // Reset counter to match snapshot generation
+        let tree = buildAriaTree(document.body);
+        [tree] = normalizeTree(tree);
+        const elementsMap = {};
+        collectElementsFromTree(tree, elementsMap);
+        const result = {
+            url: window.location.href,
+            elements: elementsMap,
+            snapshotText: snapshotText,
+            metadata: {
+                timestamp: new Date().toISOString(),
+                elementCount: Object.keys(elementsMap).length,
+                screenInfo: {
+                    width: window.innerWidth,
+                    height: window.innerHeight,
+                    devicePixelRatio: window.devicePixelRatio || 1
+                }
+            }
+        };
+        return result;
+    }
+    // Execute analysis and return result
+    return analyzePageElements();
+})();

camel/toolkits/jina_reranker_toolkit.py CHANGED Viewed

@@ -34,7 +34,7 @@ class JinaRerankerToolkit(BaseToolkit):
     def __init__(
         self,
         timeout: Optional[float] = None,
-        model_name: Optional[str] = "jinaai/jina-reranker-m0",
+        model_name: str = "jinaai/jina-reranker-m0",
         device: Optional[str] = None,
         use_api: bool = True,
     ) -> None:
@@ -44,9 +44,8 @@ class JinaRerankerToolkit(BaseToolkit):
             timeout (Optional[float]): The timeout value for API requests
                 in seconds. If None, no timeout is applied.
                 (default: :obj:`None`)
-            model_name (Optional[str]): The reranker model name. If None,
-                will use the default model.
-                (default: :obj:`None`)
+            model_name (str): The reranker model name.
+                (default: :obj:`"jinaai/jina-reranker-m0"`)
             device (Optional[str]): Device to load the model on. If None,
                 will use CUDA if available, otherwise CPU.
                 Only effective when use_api=False.

camel/toolkits/terminal_toolkit.py CHANGED Viewed

@@ -84,6 +84,7 @@ class TerminalToolkit(BaseToolkit):
         self._file_initialized = False
         self.cloned_env_path = None
         self.use_shell_mode = use_shell_mode
+        self._human_takeover_active = False
         self.python_executable = sys.executable
         self.is_macos = platform.system() == 'Darwin'
@@ -705,59 +706,35 @@ class TerminalToolkit(BaseToolkit):
                 elif command.startswith('pip'):
                     command = command.replace('pip', pip_path, 1)
-            if self.is_macos:
-                # Type safe version - macOS uses subprocess.run
-                process = subprocess.run(
-                    command,
-                    shell=True,
-                    cwd=self.working_dir,
-                    capture_output=True,
-                    text=True,
-                    env=os.environ.copy(),
-                )
-                # Process the output
-                output = process.stdout or ""
-                if process.stderr:
-                    output += f"\nStderr Output:\n{process.stderr}"
-                # Update session information and terminal
-                self.shell_sessions[id]["output"] = output
-                self._update_terminal_output(output + "\n")
-                return output
-            else:
-                # Non-macOS systems use the Popen method
-                proc = subprocess.Popen(
-                    command,
-                    shell=True,
-                    cwd=self.working_dir,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    stdin=subprocess.PIPE,
-                    text=True,
-                    bufsize=1,
-                    universal_newlines=True,
-                    env=os.environ.copy(),
-                )
+            proc = subprocess.Popen(
+                command,
+                shell=True,
+                cwd=self.working_dir,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                stdin=subprocess.PIPE,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                env=os.environ.copy(),
+            )
-                # Store the process and mark it as running
-                self.shell_sessions[id]["process"] = proc
-                self.shell_sessions[id]["running"] = True
+            # Store the process and mark it as running
+            self.shell_sessions[id]["process"] = proc
+            self.shell_sessions[id]["running"] = True
-                # Get output
-                stdout, stderr = proc.communicate()
+            # Get output
+            stdout, stderr = proc.communicate()
-                output = stdout or ""
-                if stderr:
-                    output += f"\nStderr Output:\n{stderr}"
+            output = stdout or ""
+            if stderr:
+                output += f"\nStderr Output:\n{stderr}"
-                # Update session information and terminal
-                self.shell_sessions[id]["output"] = output
-                self._update_terminal_output(output + "\n")
+            # Update session information and terminal
+            self.shell_sessions[id]["output"] = output
+            self._update_terminal_output(output + "\n")
-                return output
+            return output
         except Exception as e:
             error_msg = f"Command execution error: {e!s}"
@@ -961,6 +938,169 @@ class TerminalToolkit(BaseToolkit):
             logger.error(f"Error killing process: {e}")
             return f"Error killing process: {e!s}"
+    def ask_user_for_help(self, id: str) -> str:
+        r"""Pauses agent execution to ask a human for help in the terminal.
+        This function should be called when an agent is stuck or needs
+        assistance with a task that requires manual intervention (e.g.,
+        solving a CAPTCHA or complex debugging). The human will take over the
+        specified terminal session to execute commands and then return control
+        to the agent.
+        Args:
+            id (str): Identifier of the shell session for the human to
+                interact with. If the session does not yet exist, it will be
+                created automatically.
+        Returns:
+            str: A status message indicating that the human has finished,
+                including the number of commands executed.
+        """
+        # Input validation
+        if not id or not isinstance(id, str):
+            return "Error: Invalid session ID provided"
+        # Prevent concurrent human takeovers
+        if (
+            hasattr(self, '_human_takeover_active')
+            and self._human_takeover_active
+        ):
+            return "Error: Human takeover already in progress"
+        try:
+            self._human_takeover_active = True
+            # Ensure the session exists so that the human can reuse it
+            if id not in self.shell_sessions:
+                self.shell_sessions[id] = {
+                    "process": None,
+                    "output": "",
+                    "running": False,
+                }
+            command_count = 0
+            error_occurred = False
+            # Create clear banner message for user
+            takeover_banner = (
+                f"\n{'='*60}\n"
+                f"🤖 CAMEL Agent needs human help! Session: {id}\n"
+                f"📂 Working directory: {self.working_dir}\n"
+                f"{'='*60}\n"
+                f"💡 Type commands or '/exit' to return control to agent.\n"
+                f"{'='*60}\n"
+            )
+            # Print once to console for immediate visibility
+            print(takeover_banner, flush=True)
+            # Log for terminal output tracking
+            self._update_terminal_output(takeover_banner)
+            # Helper flag + event for coordination
+            done_event = threading.Event()
+            def _human_loop() -> None:
+                r"""Blocking loop that forwards human input to shell_exec."""
+                nonlocal command_count, error_occurred
+                try:
+                    while True:
+                        try:
+                            # Clear, descriptive prompt for user input
+                            user_cmd = input(f"🧑‍💻 [{id}]> ")
+                            if (
+                                user_cmd.strip()
+                            ):  # Only count non-empty commands
+                                command_count += 1
+                        except EOFError:
+                            # e.g. Ctrl_D / stdin closed, treat as exit.
+                            break
+                        except (KeyboardInterrupt, Exception) as e:
+                            logger.warning(
+                                f"Input error during human takeover: {e}"
+                            )
+                            error_occurred = True
+                            break
+                        if user_cmd.strip() in {"/exit", "exit", "quit"}:
+                            break
+                        try:
+                            exec_result = self.shell_exec(id, user_cmd)
+                            # Show the result immediately to the user
+                            if exec_result.strip():
+                                print(exec_result)
+                            logger.info(
+                                f"Human command executed: {user_cmd[:50]}..."
+                            )
+                            # Auto-exit after successful command
+                            break
+                        except Exception as e:
+                            error_msg = f"Error executing command: {e}"
+                            logger.error(f"Error executing human command: {e}")
+                            print(error_msg)  # Show error to user immediately
+                            self._update_terminal_output(f"{error_msg}\n")
+                            error_occurred = True
+                except Exception as e:
+                    logger.error(f"Unexpected error in human loop: {e}")
+                    error_occurred = True
+                finally:
+                    # Notify completion clearly
+                    finish_msg = (
+                        f"\n{'='*60}\n"
+                        f"✅ Human assistance completed! "
+                        f"Commands: {command_count}\n"
+                        f"🤖 Returning control to CAMEL agent...\n"
+                        f"{'='*60}\n"
+                    )
+                    print(finish_msg, flush=True)
+                    self._update_terminal_output(finish_msg)
+                    done_event.set()
+            # Start interactive thread (non-daemon for proper cleanup)
+            thread = threading.Thread(target=_human_loop, daemon=False)
+            thread.start()
+            # Block until human signals completion with timeout
+            if done_event.wait(timeout=600):  # 10 minutes timeout
+                thread.join(timeout=10)  # Give thread time to cleanup
+                # Generate detailed status message
+                status = "completed successfully"
+                if error_occurred:
+                    status = "completed with some errors"
+                result_msg = (
+                    f"Human assistance {status} for session '{id}'. "
+                    f"Total commands executed: {command_count}. "
+                    f"Working directory: {self.working_dir}"
+                )
+                logger.info(result_msg)
+                return result_msg
+            else:
+                timeout_msg = (
+                    f"Human takeover for session '{id}' timed out after 10 "
+                    "minutes"
+                )
+                logger.warning(timeout_msg)
+                return timeout_msg
+        except Exception as e:
+            error_msg = f"Error during human takeover for session '{id}': {e}"
+            logger.error(error_msg)
+            # Notify user of the error clearly
+            error_banner = (
+                f"\n{'='*60}\n"
+                f"❌ Error in human takeover! Session: {id}\n"
+                f"❗ {e}\n"
+                f"{'='*60}\n"
+            )
+            print(error_banner, flush=True)
+            return error_msg
+        finally:
+            # Always reset the flag
+            self._human_takeover_active = False
     def __del__(self):
         r"""Clean up resources when the object is being destroyed.
         Terminates all running processes and closes any open file handles.
@@ -1042,4 +1182,5 @@ class TerminalToolkit(BaseToolkit):
             FunctionTool(self.shell_wait),
             FunctionTool(self.shell_write_to_process),
             FunctionTool(self.shell_kill_process),
+            FunctionTool(self.ask_user_for_help),
         ]

camel/toolkits/video_download_toolkit.py CHANGED Viewed

@@ -26,7 +26,7 @@ from PIL import Image
 from camel.logger import get_logger
 from camel.toolkits.base import BaseToolkit
 from camel.toolkits.function_tool import FunctionTool
-from camel.utils import MCPServer, dependencies_required
+from camel.utils import dependencies_required
 logger = get_logger(__name__)
@@ -57,7 +57,6 @@ def _capture_screenshot(video_file: str, timestamp: float) -> Image.Image:
     return Image.open(io.BytesIO(out))
-@MCPServer()
 class VideoDownloaderToolkit(BaseToolkit):
     r"""A class for downloading videos and optionally splitting them into
     chunks.

camel/types/agents/tool_calling_record.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional
 from pydantic import BaseModel
@@ -24,12 +24,15 @@ class ToolCallingRecord(BaseModel):
         args (Dict[str, Any]): The dictionary of arguments passed to the tool.
         result (Any): The execution result of calling this tool.
         tool_call_id (str): The ID of the tool call, if available.
+        images (Optional[List[str]]): List of base64-encoded images returned
+            by the tool, if any.
     """
     tool_name: str
     args: Dict[str, Any]
     result: Any
     tool_call_id: str
+    images: Optional[List[str]] = None
     def __str__(self) -> str:
         r"""Overridden version of the string function.

camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.71a1py3-none-any.whl → 0.2.71a3py3-none-any.whl