PyPI - camel-ai - Versions diffs - 0.2.74a5__py3-none-any.whl → 0.2.75a2__py3-none-any.whl - Mend

camel-ai 0.2.74a5py3-none-any.whl → 0.2.75a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (68) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +2 -2
camel/interpreters/e2b_interpreter.py +34 -1
camel/models/aiml_model.py +1 -16
camel/models/anthropic_model.py +6 -22
camel/models/aws_bedrock_model.py +1 -16
camel/models/azure_openai_model.py +1 -16
camel/models/base_model.py +0 -12
camel/models/cohere_model.py +1 -16
camel/models/crynux_model.py +1 -16
camel/models/deepseek_model.py +1 -16
camel/models/gemini_model.py +1 -16
camel/models/groq_model.py +1 -17
camel/models/internlm_model.py +1 -16
camel/models/litellm_model.py +1 -16
camel/models/lmstudio_model.py +1 -17
camel/models/mistral_model.py +1 -16
camel/models/modelscope_model.py +1 -16
camel/models/moonshot_model.py +6 -22
camel/models/nemotron_model.py +0 -5
camel/models/netmind_model.py +1 -16
camel/models/novita_model.py +1 -16
camel/models/nvidia_model.py +1 -16
camel/models/ollama_model.py +1 -16
camel/models/openai_compatible_model.py +0 -3
camel/models/openai_model.py +1 -16
camel/models/openrouter_model.py +1 -17
camel/models/ppio_model.py +1 -16
camel/models/qianfan_model.py +1 -16
camel/models/qwen_model.py +1 -16
camel/models/reka_model.py +1 -16
camel/models/samba_model.py +0 -32
camel/models/sglang_model.py +1 -16
camel/models/siliconflow_model.py +1 -16
camel/models/stub_model.py +0 -4
camel/models/togetherai_model.py +1 -16
camel/models/vllm_model.py +1 -16
camel/models/volcano_model.py +0 -17
camel/models/watsonx_model.py +1 -16
camel/models/yi_model.py +1 -16
camel/models/zhipuai_model.py +1 -16
camel/societies/workforce/prompts.py +1 -8
camel/toolkits/__init__.py +0 -2
camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
camel/toolkits/search_toolkit.py +140 -27
camel/types/__init__.py +2 -2
camel/types/enums.py +20 -1
camel/types/openai_types.py +2 -2
camel/utils/mcp.py +2 -2
camel/utils/token_counting.py +18 -3
{camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/METADATA +6 -6
{camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/RECORD +67 -68
camel/toolkits/openai_agent_toolkit.py +0 -135
{camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/WHEEL +0 -0
{camel_ai-0.2.74a5.dist-info → camel_ai-0.2.75a2.dist-info}/licenses/LICENSE +0 -0

camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py CHANGED Viewed

@@ -73,11 +73,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
         "browser_select",
         "browser_scroll",
         "browser_enter",
+        "browser_mouse_control",
+        "browser_mouse_drag",
+        "browser_press_key",
         "browser_wait_user",
         "browser_solve_task",
         "browser_switch_tab",
         "browser_close_tab",
         "browser_get_tab_info",
+        "browser_console_view",
+        "browser_console_exec",
     ]
     def __init__(
@@ -99,6 +104,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
         screenshot_timeout: Optional[int] = None,
         page_stability_timeout: Optional[int] = None,
         dom_content_loaded_timeout: Optional[int] = None,
+        viewport_limit: bool = False,
     ) -> None:
         r"""Initialize the HybridBrowserToolkit.
@@ -182,6 +188,10 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
                 HYBRID_BROWSER_DOM_CONTENT_LOADED_TIMEOUT or defaults to
                 5000ms.
                 Defaults to `None`.
+            viewport_limit (bool): When True, only return snapshot results
+                visible in the current viewport. When False, return all
+                elements on the page regardless of visibility.
+                Defaults to `False`.
         """
         super().__init__()
         RegisteredAgentToolkit.__init__(self)
@@ -193,6 +203,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
         self._browser_log_to_file = browser_log_to_file
         self._default_start_url = default_start_url
         self._session_id = session_id or "default"
+        self._viewport_limit = viewport_limit
         # Store timeout configuration
         self._default_timeout = default_timeout
@@ -309,7 +320,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
                     # Try to close browser with a timeout to prevent hanging
                     try:
                         loop.run_until_complete(
-                            asyncio.wait_for(self.close_browser(), timeout=2.0)
+                            asyncio.wait_for(self.browser_close(), timeout=2.0)
                         )
                     except asyncio.TimeoutError:
                         pass  # Skip cleanup if it takes too long
@@ -550,7 +561,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
             )
     async def _get_unified_analysis(
-        self, max_retries: int = 3
+        self, max_retries: int = 3, viewport_limit: Optional[bool] = None
     ) -> Dict[str, Any]:
         r"""Get unified analysis data from the page with retry mechanism for
         navigation issues."""
@@ -573,7 +584,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
                     # Don't fail if DOM wait times out
                     pass
-                result = await page.evaluate(self._unified_script)
+                # Use instance viewport_limit if parameter not provided
+                use_viewport_limit = (
+                    viewport_limit
+                    if viewport_limit is not None
+                    else self._viewport_limit
+                )
+                result = await page.evaluate(
+                    self._unified_script, use_viewport_limit
+                )
                 if not isinstance(result, dict):
                     logger.warning(f"Invalid result type: {type(result)}")
@@ -1703,6 +1722,149 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
         return result
+    @action_logger
+    async def browser_mouse_control(
+        self, *, control: str, x: float, y: float
+    ) -> Dict[str, Any]:
+        r"""Control the mouse to interact with browser with x, y coordinates
+        Args:
+            control (str): The action to perform: 'click', 'right_click'
+            or 'dblclick'.
+            x (float): x-coordinate for the control action.
+            y (float): y-coordinate for the control action.
+        Returns:
+            Dict[str, Any]: A dictionary with the result of the action:
+                - "result" (str): Confirmation of the action.
+                - "snapshot" (str): A new page snapshot.
+                - "tabs" (List[Dict]): Information about all open tabs.
+                - "current_tab" (int): Index of the active tab.
+                - "total_tabs" (int): Total number of open tabs.
+        """
+        if control not in ("click", "right_click", "dblclick"):
+            tab_info = await self._get_tab_info_for_output()
+            return {
+                "result": "Error: supported control actions are "
+                "'click' or 'dblclick'",
+                "snapshot": "",
+                **tab_info,
+            }
+        action = {"type": "mouse_control", "control": control, "x": x, "y": y}
+        result = await self._exec_with_snapshot(action)
+        # Add tab information to the result
+        tab_info = await self._get_tab_info_for_output()
+        result.update(tab_info)
+        return result
+    @action_logger
+    async def browser_mouse_drag(
+        self, *, from_ref: str, to_ref: str
+    ) -> Dict[str, Any]:
+        r"""Control the mouse to drag and drop in the browser using ref IDs.
+        Args:
+            from_ref (str): The `ref` ID of the source element to drag from.
+            to_ref (str): The `ref` ID of the target element to drag to.
+        Returns:
+            Dict[str, Any]: A dictionary with the result of the action:
+                - "result" (str): Confirmation of the action.
+                - "snapshot" (str): A new page snapshot.
+                - "tabs" (List[Dict]): Information about all open tabs.
+                - "current_tab" (int): Index of the active tab.
+                - "total_tabs" (int): Total number of open tabs.
+        """
+        # Validate refs
+        self._validate_ref(from_ref, "drag source")
+        self._validate_ref(to_ref, "drag target")
+        # Get element analysis to find coordinates
+        analysis = await self._get_unified_analysis()
+        elements = analysis.get("elements", {})
+        if from_ref not in elements:
+            logger.error(
+                f"Error: Source element reference '{from_ref}' not found."
+            )
+            snapshot = self._format_snapshot_from_analysis(analysis)
+            tab_info = await self._get_tab_info_for_output()
+            return {
+                "result": (
+                    f"Error: Source element reference '{from_ref}' not found."
+                ),
+                "snapshot": snapshot,
+                **tab_info,
+            }
+        if to_ref not in elements:
+            logger.error(
+                f"Error: Target element reference '{to_ref}' not found."
+            )
+            snapshot = self._format_snapshot_from_analysis(analysis)
+            tab_info = await self._get_tab_info_for_output()
+            return {
+                "result": (
+                    f"Error: Target element reference '{to_ref}' not found."
+                ),
+                "snapshot": snapshot,
+                **tab_info,
+            }
+        action = {
+            "type": "mouse_drag",
+            "from_ref": from_ref,
+            "to_ref": to_ref,
+        }
+        result = await self._exec_with_snapshot(action)
+        # Add tab information to the result
+        tab_info = await self._get_tab_info_for_output()
+        result.update(tab_info)
+        return result
+    @action_logger
+    async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
+        r"""Press key and key combinations.
+        Supports single key press or combination of keys by concatenating
+        them with '+' separator.
+        Args:
+            keys (List[str]): key or list of keys.
+        Returns:
+            Dict[str, Any]: A dictionary with the result of the action:
+                - "result" (str): Confirmation of the action.
+                - "snapshot" (str): A new page snapshot.
+                - "tabs" (List[Dict]): Information about all open tabs.
+                - "current_tab" (int): Index of the active tab.
+                - "total_tabs" (int): Total number of open tabs.
+        """
+        if not isinstance(keys, list) or not all(
+            isinstance(item, str) for item in keys
+        ):
+            tab_info = await self._get_tab_info_for_output()
+            return {
+                "result": "Error: Expected keys as a list of strings.",
+                "snapshot": "",
+                **tab_info,
+            }
+        action = {"type": "press_key", "keys": keys}
+        result = await self._exec_with_snapshot(action)
+        # Add tab information to the result
+        tab_info = await self._get_tab_info_for_output()
+        result.update(tab_info)
+        return result
     @action_logger
     async def browser_wait_user(
         self, timeout_sec: Optional[float] = None
@@ -1830,6 +1992,148 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
         await agent.process_command(task_prompt, max_steps=max_steps)
         return "Task processing finished - see stdout for detailed trace."
+    @action_logger
+    async def browser_console_view(self) -> Dict[str, Any]:
+        r"""View current page console logs.
+        Returns:
+            Dict[str, Any]: A dictionary with the result of the action:
+                - console_messages (List[Dict]) : collection of logs from the
+                browser console
+        """
+        try:
+            logs = await self._session.get_console_logs()
+            # make output JSON serializable
+            return {"console_messages": list(logs)}
+        except Exception as e:
+            logger.warning(f"Failed to retrieve logs: {e}")
+            return {"console_messages": []}
+    async def browser_console_exec(self, code: str) -> Dict[str, Any]:
+        r"""Execute javascript code in the console of the current page and get
+        results.
+        Args:
+            code (str): JavaScript code for execution.
+        Returns:
+            Dict[str, Any]: A dictionary with the result of the action:
+                - "result" (str): Result of the action.
+                - "console_output" (List[str]): Console log outputs during
+                  execution.
+                - "snapshot" (str): A new page snapshot.
+                - "tabs" (List[Dict]): Information about all open tabs.
+                - "current_tab" (int): Index of the active tab.
+                - "total_tabs" (int): Total number of open tabs.
+        """
+        page = await self._require_page()
+        try:
+            logger.info("Executing JavaScript code in browser console.")
+            exec_start = time.time()
+            # Wrap the code to capture console.log output and handle
+            # expressions
+            wrapped_code = (
+                """
+                (function() {
+                    const _logs = [];
+                    const originalLog = console.log;
+                    console.log = function(...args) {
+                        _logs.push(args.map(arg => {
+                            try {
+                                return typeof arg === 'object' ?
+                                    JSON.stringify(arg) : String(arg);
+                            } catch (e) {
+                                return String(arg);
+                            }
+                        }).join(' '));
+                        originalLog.apply(console, args);
+                    };
+                    let result;
+                    try {
+                        // First try to evaluate as an expression
+                        // (like browser console)
+                        result = eval("""
+                + repr(code)
+                + """);
+                    } catch (e) {
+                        // If that fails, execute as statements
+                        try {
+                            result = (function() { """
+                + code
+                + """ })();
+                        } catch (error) {
+                            console.log = originalLog;
+                            throw error;
+                        }
+                    }
+                    console.log = originalLog;
+                    return { result, logs: _logs };
+                })()
+            """
+            )
+            eval_result = await page.evaluate(wrapped_code)
+            result = eval_result.get('result')
+            console_logs = eval_result.get('logs', [])
+            exec_time = time.time() - exec_start
+            logger.info(f"Code execution completed in {exec_time:.2f}s.")
+            import asyncio
+            import json
+            await asyncio.sleep(0.2)
+            # Get snapshot
+            logger.info("Capturing page snapshot after code execution.")
+            snapshot_start = time.time()
+            snapshot = await self._session.get_snapshot(
+                force_refresh=True, diff_only=False
+            )
+            snapshot_time = time.time() - snapshot_start
+            logger.info(
+                f"Code execution snapshot captured in " f"{snapshot_time:.2f}s"
+            )
+            # Get tab information
+            tab_info = await self._get_tab_info_for_output()
+            # Properly serialize the result
+            try:
+                result_str = json.dumps(result, indent=2)
+            except (TypeError, ValueError):
+                result_str = str(result)
+            return {
+                "result": f"Code execution result: {result_str}",
+                "console_output": console_logs,
+                "snapshot": snapshot,
+                **tab_info,
+            }
+        except Exception as e:
+            logger.warning(f"Code execution failed: {e}")
+            # Get tab information for error case
+            try:
+                tab_info = await self._get_tab_info_for_output()
+            except Exception:
+                tab_info = {
+                    "tabs": [],
+                    "current_tab": 0,
+                    "total_tabs": 0,
+                }
+            return {
+                "result": f"Code execution failed: {e}",
+                "console_output": [],
+                "snapshot": "",
+                **tab_info,
+            }
     def get_log_summary(self) -> Dict[str, Any]:
         r"""Get a summary of logged actions."""
         if not self.log_buffer:
@@ -2045,11 +2349,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
             "browser_select": self.browser_select,
             "browser_scroll": self.browser_scroll,
             "browser_enter": self.browser_enter,
+            "browser_mouse_control": self.browser_mouse_control,
+            "browser_mouse_drag": self.browser_mouse_drag,
+            "browser_press_key": self.browser_press_key,
             "browser_wait_user": self.browser_wait_user,
             "browser_solve_task": self.browser_solve_task,
             "browser_switch_tab": self.browser_switch_tab,
             "browser_close_tab": self.browser_close_tab,
             "browser_get_tab_info": self.browser_get_tab_info,
+            "browser_console_view": self.browser_console_view,
+            "browser_console_exec": self.browser_console_exec,
         }
         enabled_tools = []

camel/toolkits/hybrid_browser_toolkit_py/snapshot.py CHANGED Viewed

@@ -43,7 +43,11 @@ class PageSnapshot:
     # Public API
     # ---------------------------------------------------------------------
     async def capture(
-        self, *, force_refresh: bool = False, diff_only: bool = False
+        self,
+        *,
+        force_refresh: bool = False,
+        diff_only: bool = False,
+        viewport_limit: bool = False,
     ) -> str:
         """Return current snapshot or just the diff to previous one."""
         try:
@@ -65,7 +69,9 @@ class PageSnapshot:
             )
             logger.debug("Capturing page snapshot …")
-            snapshot_result = await self._get_snapshot_direct()
+            snapshot_result = await self._get_snapshot_direct(
+                viewport_limit=viewport_limit
+            )
             # Extract snapshot text from the unified analyzer result
             if (
@@ -111,7 +117,7 @@ class PageSnapshot:
     _snapshot_js_cache: Optional[str] = None  # class-level cache
     async def _get_snapshot_direct(
-        self,
+        self, viewport_limit: bool = False
     ) -> Optional[Union[str, Dict[str, Any]]]:
         r"""Evaluate the snapshot-extraction JS with simple retry logic.
@@ -133,7 +139,7 @@ class PageSnapshot:
         retries: int = 3
         while retries > 0:
             try:
-                return await self.page.evaluate(js_code)
+                return await self.page.evaluate(js_code, viewport_limit)
             except Exception as e:
                 msg = str(e)

camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js CHANGED Viewed

@@ -1,4 +1,4 @@
-(() => {
+((viewport_limit = false) => {
     // Unified analyzer that combines visual and structural analysis
     // Preserves complete snapshot.js logic while adding visual coordinate information
@@ -406,6 +406,11 @@
         if (tagName === 'header') return 'banner';
         if (tagName === 'footer') return 'contentinfo';
         if (tagName === 'fieldset') return 'group';
+        // Enhanced role mappings for table elements
+        if (tagName === 'table') return 'table';
+        if (tagName === 'tr') return 'row';
+        if (tagName === 'td' || tagName === 'th') return 'cell';
         return 'generic';
     }
@@ -484,6 +489,9 @@
         // Add a heuristic to ignore code-like text that might be in the DOM
         if ((text.match(/[;:{}]/g)?.length || 0) > 2) return '';
         return text;
         }
@@ -578,6 +586,8 @@
             const level = getAriaLevel(element);
             if (level > 0) node.level = level;
             return node;
         }
@@ -725,6 +735,9 @@
         if (isRedundantWrapper) {
             return node.children;
         }
         return [node];
     }
@@ -815,6 +828,23 @@
     // === Visual analysis functions from page_script.js ===
+    // Check if element is within the current viewport
+    function isInViewport(element) {
+        if (!element || element.nodeType !== Node.ELEMENT_NODE) return false;
+        try {
+            const rect = element.getBoundingClientRect();
+            return (
+                rect.top >= 0 &&
+                rect.left >= 0 &&
+                rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
+                rect.right <= (window.innerWidth || document.documentElement.clientWidth)
+            );
+        } catch (e) {
+            return false;
+        }
+    }
     // From page_script.js - check if element is topmost at coordinates
     function isTopmost(element, x, y) {
         let hit = document.elementFromPoint(x, y);
@@ -855,10 +885,21 @@
     // === Unified analysis function ===
-    function collectElementsFromTree(node, elementsMap) {
+    function collectElementsFromTree(node, elementsMap, viewportLimitEnabled = false) {
         if (typeof node === 'string') return;
         if (node.element && node.ref) {
+            // If viewport_limit is enabled, only include elements that are in the viewport
+            if (viewportLimitEnabled && !isInViewport(node.element)) {
+                // Skip this element but still process its children
+                if (node.children) {
+                    for (const child of node.children) {
+                        collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
+                    }
+                }
+                return;
+            }
             // Get visual coordinates for this element
             const coordinates = getElementCoordinates(node.element);
@@ -891,7 +932,7 @@
         // Recursively process children
         if (node.children) {
             for (const child of node.children) {
-                collectElementsFromTree(child, elementsMap);
+                collectElementsFromTree(child, elementsMap, viewportLimitEnabled);
             }
         }
     }
@@ -931,7 +972,7 @@
         [tree] = normalizeTree(tree);
         const elementsMap = {};
-        collectElementsFromTree(tree, elementsMap);
+        collectElementsFromTree(tree, elementsMap, viewport_limit);
         // Verify uniqueness of aria-ref attributes (debugging aid)
         const ariaRefCounts = {};

camel-ai 0.2.74a5__py3-none-any.whl → 0.2.75a2__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.74a5py3-none-any.whl → 0.2.75a2py3-none-any.whl