PyPI - camel-ai - Versions diffs - 0.2.71a2__py3-none-any.whl → 0.2.71a3__py3-none-any.whl - Mend

camel-ai 0.2.71a2py3-none-any.whl → 0.2.71a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (21) hide show

camel/services/agent_openapi_server.py ADDED Viewed

@@ -0,0 +1,380 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Any, Dict, List, Optional, Type, Union
+from fastapi import APIRouter, FastAPI, HTTPException
+from pydantic import BaseModel
+from camel.agents.chat_agent import ChatAgent
+from camel.messages import BaseMessage
+from camel.models import ModelFactory
+from camel.toolkits import FunctionTool
+from camel.types import RoleType
+class InitRequest(BaseModel):
+    r"""Request schema for initializing a ChatAgent via the OpenAPI server.
+    Defines the configuration used to create a new agent, including the model,
+    system message, tool names, and generation parameters.
+    Args:
+        model_type (Optional[str]): The model type to use. Should match a key
+            supported by the model manager, e.g., "gpt-4o-mini".
+            (default: :obj:`"gpt-4o-mini"`)
+        model_platform (Optional[str]): The model platform to use.
+            (default: :obj:`"openai"`)
+        tools_names (Optional[List[str]]): A list of tool names to load from
+            the tool registry. These tools will be available to the agent.
+            (default: :obj:`None`)
+        external_tools (Optional[List[Dict[str, Any]]]): Tool definitions
+            provided directly as dictionaries, bypassing the registry.
+            Currently not supported. (default: :obj:`None`)
+        agent_id (str): The unique identifier for the agent. Must be provided
+            explicitly to support multi-agent routing and control.
+        system_message (Optional[str]): The system prompt for the agent,
+            describing its behavior or role. (default: :obj:`None`)
+        message_window_size (Optional[int]): The number of recent messages to
+            retain in memory for context. (default: :obj:`None`)
+        token_limit (Optional[int]): The token budget for contextual memory.
+            (default: :obj:`None`)
+        output_language (Optional[str]): Preferred output language for the
+            agent's replies. (default: :obj:`None`)
+        max_iteration (Optional[int]): Maximum number of model
+            calling iterations allowed per step. If `None` (default), there's
+            no explicit limit. If `1`, it performs a single model call. If `N
+            > 1`, it allows up to N model calls. (default: :obj:`None`)
+    """
+    model_type: Optional[str] = "gpt-4o-mini"
+    model_platform: Optional[str] = "openai"
+    tools_names: Optional[List[str]] = None
+    external_tools: Optional[List[Dict[str, Any]]] = None
+    agent_id: str  # Required: explicitly set agent_id to
+    # support future multi-agent and permission control
+    system_message: Optional[str] = None
+    message_window_size: Optional[int] = None
+    token_limit: Optional[int] = None
+    output_language: Optional[str] = None
+    max_iteration: Optional[int] = None  # Changed from Optional[bool] = False
+class StepRequest(BaseModel):
+    r"""Request schema for sending a user message to a ChatAgent.
+    Supports plain text input or structured message dictionaries, with an
+    optional response format for controlling output structure.
+    Args:
+        input_message (Union[str, Dict[str, Any]]): The user message to send.
+            Can be a plain string or a message dict with role, content, etc.
+        response_format (Optional[str]): Optional format name that maps to a
+            registered response schema. Not currently in use.
+            (default: :obj:`None`)
+    """
+    input_message: Union[str, Dict[str, Any]]
+    response_format: Optional[str] = None  # reserved, not used yet
+class ChatAgentOpenAPIServer:
+    r"""A FastAPI server wrapper for managing ChatAgents via OpenAPI routes.
+    This server exposes a versioned REST API for interacting with CAMEL
+    agents, supporting initialization, message passing, memory inspection,
+    and optional tool usage. It supports multi-agent use cases by mapping
+    unique agent IDs to active ChatAgent instances.
+    Typical usage includes initializing agents with system prompts and tools,
+    exchanging messages using /step or /astep endpoints, and inspecting agent
+    memory with /history.
+    Supports pluggable tool and response format registries for customizing
+    agent behavior or output schemas.
+    """
+    def __init__(
+        self,
+        tool_registry: Optional[Dict[str, List[FunctionTool]]] = None,
+        response_format_registry: Optional[Dict[str, Type[BaseModel]]] = None,
+    ):
+        r"""Initializes the OpenAPI server for managing ChatAgents.
+        Sets up internal agent storage, tool and response format registries,
+        and prepares versioned API routes.
+        Args:
+            tool_registry (Optional[Dict[str, List[FunctionTool]]]): A mapping
+                from tool names to lists of FunctionTool instances available
+                to agents via the "tools_names" field. If not provided, an
+                empty registry is used. (default: :obj:`None`)
+            response_format_registry (Optional[Dict[str, Type[BaseModel]]]):
+                A mapping from format names to Pydantic output schemas for
+                structured response parsing. Used for controlling the format
+                of step results. (default: :obj:`None`)
+        """
+        # Initialize FastAPI app and agent
+        self.app = FastAPI(title="CAMEL OpenAPI-compatible Server")
+        self.agents: Dict[str, ChatAgent] = {}
+        self.tool_registry = tool_registry or {}
+        self.response_format_registry = response_format_registry or {}
+        self._setup_routes()
+    def _parse_input_message_for_step(
+        self, raw: Union[str, dict]
+    ) -> BaseMessage:
+        r"""Parses raw input into a BaseMessage object.
+        Args:
+            raw (str or dict): User input as plain text or dict.
+        Returns:
+            BaseMessage: Parsed input message.
+        """
+        if isinstance(raw, str):
+            return BaseMessage.make_user_message(role_name="User", content=raw)
+        elif isinstance(raw, dict):
+            if isinstance(raw.get("role_type"), str):
+                raw["role_type"] = RoleType(raw["role_type"].lower())
+            return BaseMessage(**raw)
+        raise HTTPException(
+            status_code=400, detail="Unsupported input format."
+        )
+    def _resolve_response_format_for_step(
+        self, name: Optional[str]
+    ) -> Optional[Type[BaseModel]]:
+        r"""Resolves the response format by name.
+        Args:
+            name (str or None): Optional format name.
+        Returns:
+            Optional[Type[BaseModel]]: Response schema class.
+        """
+        if name is None:
+            return None
+        if name not in self.response_format_registry:
+            raise HTTPException(
+                status_code=400, detail=f"Unknown response_format: {name}"
+            )
+        return self.response_format_registry[name]
+    def _setup_routes(self):
+        r"""Registers OpenAPI endpoints for agent creation and interaction.
+        This includes routes for initializing agents (/init), sending
+        messages (/step and /astep), resetting agent memory (/reset), and
+        retrieving conversation history (/history). All routes are added
+        under the /v1/agents namespace.
+        """
+        router = APIRouter(prefix="/v1/agents")
+        @router.post("/init")
+        def init_agent(request: InitRequest):
+            r"""Initializes a ChatAgent instance with a model,
+            system message, and optional tools.
+            Args:
+                request (InitRequest): The agent config including
+                    model, tools, system message, and agent ID.
+            Returns:
+                dict: A message with the agent ID and status.
+            """
+            agent_id = request.agent_id
+            if agent_id in self.agents:
+                return {
+                    "agent_id": agent_id,
+                    "message": "Agent already exists.",
+                }
+            model_type = request.model_type
+            model_platform = request.model_platform
+            model = ModelFactory.create(
+                model_platform=model_platform,  # type: ignore[arg-type]
+                model_type=model_type,  # type: ignore[arg-type]
+            )
+            # tools lookup
+            tools = []
+            if request.tools_names:
+                for name in request.tools_names:
+                    if name in self.tool_registry:
+                        tools.extend(self.tool_registry[name])
+                    else:
+                        raise HTTPException(
+                            status_code=400,
+                            detail=f"Tool '{name}' " f"not found in registry",
+                        )
+            # system message
+            system_message = request.system_message
+            agent = ChatAgent(
+                model=model,
+                tools=tools,  # type: ignore[arg-type]
+                external_tools=request.external_tools,  # type: ignore[arg-type]
+                system_message=system_message,
+                message_window_size=request.message_window_size,
+                token_limit=request.token_limit,
+                output_language=request.output_language,
+                max_iteration=request.max_iteration,
+                agent_id=agent_id,
+            )
+            self.agents[agent_id] = agent
+            return {"agent_id": agent_id, "message": "Agent initialized."}
+        @router.post("/astep/{agent_id}")
+        async def astep_agent(agent_id: str, request: StepRequest):
+            r"""Runs one async step of agent response.
+            Args:
+                agent_id (str): The ID of the target agent.
+                request (StepRequest): The input message.
+            Returns:
+                dict: The model response in serialized form.
+            """
+            if agent_id not in self.agents:
+                raise HTTPException(status_code=404, detail="Agent not found.")
+            agent = self.agents[agent_id]
+            input_message = self._parse_input_message_for_step(
+                request.input_message
+            )
+            format_cls = self._resolve_response_format_for_step(
+                request.response_format
+            )
+            try:
+                response = await agent.astep(
+                    input_message=input_message, response_format=format_cls
+                )
+                return response.model_dump()
+            except Exception as e:
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"Unexpected error during async step: {e!s}",
+                )
+        @router.get("/list_agent_ids")
+        def list_agent_ids():
+            r"""Returns a list of all active agent IDs.
+            Returns:
+                dict: A dictionary containing all registered agent IDs.
+            """
+            return {"agent_ids": list(self.agents.keys())}
+        @router.post("/delete/{agent_id}")
+        def delete_agent(agent_id: str):
+            r"""Deletes an agent from the server.
+            Args:
+                agent_id (str): The ID of the agent to delete.
+            Returns:
+                dict: A confirmation message upon successful deletion.
+            """
+            if agent_id not in self.agents:
+                raise HTTPException(status_code=404, detail="Agent not found.")
+            del self.agents[agent_id]
+            return {"message": f"Agent {agent_id} deleted."}
+        @router.post("/step/{agent_id}")
+        def step_agent(agent_id: str, request: StepRequest):
+            r"""Runs one step of synchronous agent response.
+            Args:
+                agent_id (str): The ID of the target agent.
+                request (StepRequest): The input message.
+            Returns:
+                dict: The model response in serialized form.
+            """
+            if agent_id not in self.agents:
+                raise HTTPException(status_code=404, detail="Agent not found.")
+            agent = self.agents[agent_id]
+            input_message = self._parse_input_message_for_step(
+                request.input_message
+            )
+            format_cls = self._resolve_response_format_for_step(
+                request.response_format
+            )
+            try:
+                response = agent.step(
+                    input_message=input_message, response_format=format_cls
+                )
+                return response.model_dump()
+            except Exception as e:
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"Unexpected error during step: {e!s}",
+                )
+        @router.post("/reset/{agent_id}")
+        def reset_agent(agent_id: str):
+            r"""Clears memory for a specific agent.
+            Args:
+                agent_id (str): The ID of the agent to reset.
+            Returns:
+                dict: A message confirming reset success.
+            """
+            if agent_id not in self.agents:
+                raise HTTPException(status_code=404, detail="Agent not found.")
+            self.agents[agent_id].reset()
+            return {"message": f"Agent {agent_id} reset."}
+        @router.get("/history/{agent_id}")
+        def get_agent_chat_history(agent_id: str):
+            r"""Returns the chat history of an agent.
+            Args:
+                agent_id (str): The ID of the agent to query.
+            Returns:
+                list: The list of conversation messages.
+            """
+            if agent_id not in self.agents:
+                raise HTTPException(
+                    status_code=404, detail=f"Agent {agent_id} not found."
+                )
+            return self.agents[agent_id].chat_history
+        # Register all routes to the main FastAPI app
+        self.app.include_router(router)
+    def get_app(self) -> FastAPI:
+        r"""Returns the FastAPI app instance.
+        Returns:
+            FastAPI: The wrapped application object.
+        """
+        return self.app

camel/toolkits/__init__.py CHANGED Viewed

@@ -77,7 +77,7 @@ from .aci_toolkit import ACIToolkit
 from .playwright_mcp_toolkit import PlaywrightMCPToolkit
 from .wolfram_alpha_toolkit import WolframAlphaToolkit
 from .task_planning_toolkit import TaskPlanningToolkit
-from .non_visual_browser_toolkit import BrowserNonVisualToolkit
+from .hybrid_browser_toolkit import HybridBrowserToolkit
 from .edgeone_pages_mcp_toolkit import EdgeOnePagesMCPToolkit
 from .google_drive_mcp_toolkit import GoogleDriveMCPToolkit
 from .craw4ai_toolkit import Crawl4AIToolkit
@@ -146,7 +146,7 @@ __all__ = [
     'WolframAlphaToolkit',
     'BohriumToolkit',
     'TaskPlanningToolkit',
-    'BrowserNonVisualToolkit',
+    'HybridBrowserToolkit',
     'EdgeOnePagesMCPToolkit',
     'GoogleDriveMCPToolkit',
     'Crawl4AIToolkit',

camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py RENAMED Viewed

@@ -11,8 +11,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from .browser_non_visual_toolkit import BrowserNonVisualToolkit
+from .hybrid_browser_toolkit import HybridBrowserToolkit
 __all__ = [
-    "BrowserNonVisualToolkit",
+    "HybridBrowserToolkit",
 ]

camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py RENAMED Viewed

@@ -24,6 +24,7 @@ class ActionExecutor:
     # Configuration constants
     DEFAULT_TIMEOUT = 5000  # 5 seconds
     SHORT_TIMEOUT = 2000  # 2 seconds
+    MAX_SCROLL_AMOUNT = 5000  # Maximum scroll distance in pixels
     def __init__(self, page: "Page"):
         self.page = page
@@ -32,6 +33,7 @@ class ActionExecutor:
     # Public helpers
     # ------------------------------------------------------------------
     async def execute(self, action: Dict[str, Any]) -> str:
+        r"""Execute an action and return the result description."""
         if not action:
             return "No action to execute"
@@ -64,32 +66,46 @@ class ActionExecutor:
     # Internal handlers
     # ------------------------------------------------------------------
     async def _click(self, action: Dict[str, Any]) -> str:
+        r"""Handle click actions with multiple fallback strategies."""
         ref = action.get("ref")
         text = action.get("text")
         selector = action.get("selector")
         if not (ref or text or selector):
             return "Error: click requires ref/text/selector"
+        # Build strategies in priority order: ref > selector > text
         strategies = []
+        if ref:
+            strategies.append(f"[aria-ref='{ref}']")
         if selector:
             strategies.append(selector)
         if text:
             strategies.append(f'text="{text}"')
-        if ref:
-            strategies.append(f"[aria-ref='{ref}']")
+        # Strategy 1: Try Playwright force click for each selector
         for sel in strategies:
             try:
                 if await self.page.locator(sel).count() > 0:
                     await self.page.click(
-                        sel, timeout=self.SHORT_TIMEOUT, force=True
+                        sel, timeout=self.DEFAULT_TIMEOUT, force=True
                     )
-                    return f"Clicked element via {sel}"
+                    return f"Clicked element via force: {sel}"
             except Exception:
-                pass
-        return "Error: Could not click element"
+                continue
+        # Strategy 2: Try JavaScript click as fallback
+        for sel in strategies:
+            try:
+                await self.page.locator(sel).first.evaluate("el => el.click()")
+                await asyncio.sleep(0.1)  # Brief wait for effects
+                return f"Clicked element via JS: {sel}"
+            except Exception:
+                continue
+        return "Error: All click strategies failed"
     async def _type(self, action: Dict[str, Any]) -> str:
+        r"""Handle typing text into input fields."""
         ref = action.get("ref")
         selector = action.get("selector")
         text = action.get("text", "")
@@ -103,6 +119,7 @@ class ActionExecutor:
             return f"Type failed: {exc}"
     async def _select(self, action: Dict[str, Any]) -> str:
+        r"""Handle selecting options from dropdowns."""
         ref = action.get("ref")
         selector = action.get("selector")
         value = action.get("value", "")
@@ -118,8 +135,9 @@ class ActionExecutor:
             return f"Select failed: {exc}"
     async def _wait(self, action: Dict[str, Any]) -> str:
+        r"""Handle wait actions."""
         if "timeout" in action:
-            ms = action["timeout"]
+            ms = int(action["timeout"])
             await asyncio.sleep(ms / 1000)
             return f"Waited {ms}ms"
         if "selector" in action:
@@ -131,6 +149,7 @@ class ActionExecutor:
         return "Error: wait requires timeout/selector"
     async def _extract(self, action: Dict[str, Any]) -> str:
+        r"""Handle text extraction from elements."""
         ref = action.get("ref")
         if not ref:
             return "Error: extract requires ref"
@@ -140,6 +159,7 @@ class ActionExecutor:
         return f"Extracted: {txt[:100] if txt else 'None'}"
     async def _scroll(self, action: Dict[str, Any]) -> str:
+        r"""Handle page scrolling with safe parameter validation."""
         direction = action.get("direction", "down")
         amount = action.get("amount", 300)
@@ -151,18 +171,22 @@ class ActionExecutor:
             # Safely convert amount to integer and clamp to reasonable range
             amount_int = int(amount)
             amount_int = max(
-                -5000, min(5000, amount_int)
-            )  # Clamp between -5000 and 5000
+                -self.MAX_SCROLL_AMOUNT,
+                min(self.MAX_SCROLL_AMOUNT, amount_int),
+            )  # Clamp to MAX_SCROLL_AMOUNT range
         except (ValueError, TypeError):
             return "Error: amount must be a valid number"
         # Use safe evaluation with bound parameters
         scroll_offset = amount_int if direction == "down" else -amount_int
-        await self.page.evaluate(f"window.scrollBy(0, {scroll_offset})")
+        await self.page.evaluate(
+            "offset => window.scrollBy(0, offset)", scroll_offset
+        )
         await asyncio.sleep(0.5)
         return f"Scrolled {direction} by {abs(amount_int)}px"
     async def _enter(self, action: Dict[str, Any]) -> str:
+        r"""Handle Enter key press actions."""
         ref = action.get("ref")
         selector = action.get("selector")
         if ref:
@@ -175,16 +199,28 @@ class ActionExecutor:
     # utilities
     async def _wait_dom_stable(self) -> None:
+        r"""Wait for DOM to become stable before executing actions."""
         try:
+            # Wait for basic DOM content loading
             await self.page.wait_for_load_state(
                 'domcontentloaded', timeout=self.SHORT_TIMEOUT
             )
+            # Try to wait for network idle briefly
+            try:
+                await self.page.wait_for_load_state(
+                    'networkidle', timeout=self.SHORT_TIMEOUT
+                )
+            except Exception:
+                pass  # Network idle is optional
         except Exception:
-            pass
+            pass  # Don't fail if wait times out
     # static helpers
     @staticmethod
     def should_update_snapshot(action: Dict[str, Any]) -> bool:
+        r"""Determine if an action requires a snapshot update."""
         change_types = {
             "click",
             "type",

camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py RENAMED Viewed

@@ -12,24 +12,24 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 import json
-import logging
 import re
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from camel.logger import get_logger
 from camel.models import BaseModelBackend, ModelFactory
 from camel.types import ModelPlatformType, ModelType
 from .actions import ActionExecutor
-from .nv_browser_session import NVBrowserSession
+from .browser_session import NVBrowserSession
 if TYPE_CHECKING:
     from camel.agents import ChatAgent
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 class PlaywrightLLMAgent:
-    """High-level orchestration: snapshot ↔ LLM ↔ action executor."""
+    r"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
     # System prompt as class constant to avoid recreation
     SYSTEM_PROMPT = """
@@ -90,8 +90,8 @@ what was accomplished
         self.action_history: List[Dict[str, Any]] = []
         if model_backend is None:
             model_backend = ModelFactory.create(
-                model_platform=ModelPlatformType.OPENAI,
-                model_type=ModelType.GPT_4O_MINI,
+                model_platform=ModelPlatformType.DEFAULT,
+                model_type=ModelType.DEFAULT,
                 model_config_dict={"temperature": 0, "top_p": 1},
             )
         self.model_backend = model_backend
@@ -99,16 +99,19 @@ what was accomplished
         self._chat_agent: Optional[ChatAgent] = None
     async def navigate(self, url: str) -> str:
+        r"""Navigate to a URL and return the snapshot."""
         try:
             # NVBrowserSession handles waits internally
             logger.debug("Navigated to URL: %s", url)
             await self._session.visit(url)
             return await self._session.get_snapshot(force_refresh=True)
         except Exception as exc:
-            return f"Error: could not navigate - {exc}"
+            error_msg = f"Error: could not navigate to {url} - {exc}"
+            logger.error(error_msg)
+            return error_msg
     def _get_chat_agent(self) -> "ChatAgent":
-        """Get or create the ChatAgent instance."""
+        r"""Get or create the ChatAgent instance."""
         from camel.agents import ChatAgent
         if self._chat_agent is None:
@@ -165,12 +168,16 @@ what was accomplished
         logger.warning(
             "Could not parse JSON from LLM response: %s", content[:200]
         )
+        return self._get_fallback_response("Parsing error")
+    def _get_fallback_response(self, error_msg: str) -> Dict[str, Any]:
+        r"""Generate a fallback response structure."""
         return {
-            "plan": ["Could not parse response"],
+            "plan": [f"Could not parse response: {error_msg}"],
             "action": {
                 "type": "finish",
                 "ref": None,
-                "summary": "Parsing error",
+                "summary": f"Parsing error: {error_msg}",
             },
         }
@@ -181,7 +188,7 @@ what was accomplished
         is_initial: bool,
         history: Optional[List[Dict[str, Any]]] = None,
     ) -> Dict[str, Any]:
-        """Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
+        r"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
         # Build user message
         if is_initial:
             user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
@@ -208,6 +215,7 @@ what was accomplished
         return self._safe_parse_json(content)
     async def process_command(self, prompt: str, max_steps: int = 15):
+        r"""Process a command using LLM-guided browser automation."""
         # initial full snapshot
         full_snapshot = await self._session.get_snapshot()
         assert self._session.snapshot is not None
@@ -270,9 +278,11 @@ what was accomplished
         logger.info("Process completed with %d steps", steps)
     async def _run_action(self, action: Dict[str, Any]) -> str:
+        r"""Execute a single action and return the result."""
         if action.get("type") == "navigate":
             return await self.navigate(action.get("url", ""))
         return await self._session.exec_action(action)
     async def close(self):
+        r"""Clean up browser session and resources."""
         await self._session.close()

camel-ai 0.2.71a2__py3-none-any.whl → 0.2.71a3__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.71a2py3-none-any.whl → 0.2.71a3py3-none-any.whl