camel-ai 0.2.71a2__py3-none-any.whl → 0.2.71a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -0,0 +1,380 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+
16
+ from typing import Any, Dict, List, Optional, Type, Union
17
+
18
+ from fastapi import APIRouter, FastAPI, HTTPException
19
+ from pydantic import BaseModel
20
+
21
+ from camel.agents.chat_agent import ChatAgent
22
+ from camel.messages import BaseMessage
23
+ from camel.models import ModelFactory
24
+ from camel.toolkits import FunctionTool
25
+ from camel.types import RoleType
26
+
27
+
28
+ class InitRequest(BaseModel):
29
+ r"""Request schema for initializing a ChatAgent via the OpenAPI server.
30
+
31
+ Defines the configuration used to create a new agent, including the model,
32
+ system message, tool names, and generation parameters.
33
+
34
+ Args:
35
+ model_type (Optional[str]): The model type to use. Should match a key
36
+ supported by the model manager, e.g., "gpt-4o-mini".
37
+ (default: :obj:`"gpt-4o-mini"`)
38
+ model_platform (Optional[str]): The model platform to use.
39
+ (default: :obj:`"openai"`)
40
+ tools_names (Optional[List[str]]): A list of tool names to load from
41
+ the tool registry. These tools will be available to the agent.
42
+ (default: :obj:`None`)
43
+ external_tools (Optional[List[Dict[str, Any]]]): Tool definitions
44
+ provided directly as dictionaries, bypassing the registry.
45
+ Currently not supported. (default: :obj:`None`)
46
+ agent_id (str): The unique identifier for the agent. Must be provided
47
+ explicitly to support multi-agent routing and control.
48
+ system_message (Optional[str]): The system prompt for the agent,
49
+ describing its behavior or role. (default: :obj:`None`)
50
+ message_window_size (Optional[int]): The number of recent messages to
51
+ retain in memory for context. (default: :obj:`None`)
52
+ token_limit (Optional[int]): The token budget for contextual memory.
53
+ (default: :obj:`None`)
54
+ output_language (Optional[str]): Preferred output language for the
55
+ agent's replies. (default: :obj:`None`)
56
+ max_iteration (Optional[int]): Maximum number of model
57
+ calling iterations allowed per step. If `None` (default), there's
58
+ no explicit limit. If `1`, it performs a single model call. If `N
59
+ > 1`, it allows up to N model calls. (default: :obj:`None`)
60
+ """
61
+
62
+ model_type: Optional[str] = "gpt-4o-mini"
63
+ model_platform: Optional[str] = "openai"
64
+
65
+ tools_names: Optional[List[str]] = None
66
+ external_tools: Optional[List[Dict[str, Any]]] = None
67
+
68
+ agent_id: str # Required: explicitly set agent_id to
69
+ # support future multi-agent and permission control
70
+
71
+ system_message: Optional[str] = None
72
+ message_window_size: Optional[int] = None
73
+ token_limit: Optional[int] = None
74
+ output_language: Optional[str] = None
75
+ max_iteration: Optional[int] = None # Changed from Optional[bool] = False
76
+
77
+
78
+ class StepRequest(BaseModel):
79
+ r"""Request schema for sending a user message to a ChatAgent.
80
+
81
+ Supports plain text input or structured message dictionaries, with an
82
+ optional response format for controlling output structure.
83
+
84
+ Args:
85
+ input_message (Union[str, Dict[str, Any]]): The user message to send.
86
+ Can be a plain string or a message dict with role, content, etc.
87
+ response_format (Optional[str]): Optional format name that maps to a
88
+ registered response schema. Not currently in use.
89
+ (default: :obj:`None`)
90
+ """
91
+
92
+ input_message: Union[str, Dict[str, Any]]
93
+ response_format: Optional[str] = None # reserved, not used yet
94
+
95
+
96
+ class ChatAgentOpenAPIServer:
97
+ r"""A FastAPI server wrapper for managing ChatAgents via OpenAPI routes.
98
+
99
+ This server exposes a versioned REST API for interacting with CAMEL
100
+ agents, supporting initialization, message passing, memory inspection,
101
+ and optional tool usage. It supports multi-agent use cases by mapping
102
+ unique agent IDs to active ChatAgent instances.
103
+
104
+ Typical usage includes initializing agents with system prompts and tools,
105
+ exchanging messages using /step or /astep endpoints, and inspecting agent
106
+ memory with /history.
107
+
108
+ Supports pluggable tool and response format registries for customizing
109
+ agent behavior or output schemas.
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ tool_registry: Optional[Dict[str, List[FunctionTool]]] = None,
115
+ response_format_registry: Optional[Dict[str, Type[BaseModel]]] = None,
116
+ ):
117
+ r"""Initializes the OpenAPI server for managing ChatAgents.
118
+
119
+ Sets up internal agent storage, tool and response format registries,
120
+ and prepares versioned API routes.
121
+
122
+ Args:
123
+ tool_registry (Optional[Dict[str, List[FunctionTool]]]): A mapping
124
+ from tool names to lists of FunctionTool instances available
125
+ to agents via the "tools_names" field. If not provided, an
126
+ empty registry is used. (default: :obj:`None`)
127
+ response_format_registry (Optional[Dict[str, Type[BaseModel]]]):
128
+ A mapping from format names to Pydantic output schemas for
129
+ structured response parsing. Used for controlling the format
130
+ of step results. (default: :obj:`None`)
131
+ """
132
+
133
+ # Initialize FastAPI app and agent
134
+ self.app = FastAPI(title="CAMEL OpenAPI-compatible Server")
135
+ self.agents: Dict[str, ChatAgent] = {}
136
+ self.tool_registry = tool_registry or {}
137
+ self.response_format_registry = response_format_registry or {}
138
+ self._setup_routes()
139
+
140
+ def _parse_input_message_for_step(
141
+ self, raw: Union[str, dict]
142
+ ) -> BaseMessage:
143
+ r"""Parses raw input into a BaseMessage object.
144
+
145
+ Args:
146
+ raw (str or dict): User input as plain text or dict.
147
+
148
+ Returns:
149
+ BaseMessage: Parsed input message.
150
+ """
151
+ if isinstance(raw, str):
152
+ return BaseMessage.make_user_message(role_name="User", content=raw)
153
+ elif isinstance(raw, dict):
154
+ if isinstance(raw.get("role_type"), str):
155
+ raw["role_type"] = RoleType(raw["role_type"].lower())
156
+ return BaseMessage(**raw)
157
+ raise HTTPException(
158
+ status_code=400, detail="Unsupported input format."
159
+ )
160
+
161
+ def _resolve_response_format_for_step(
162
+ self, name: Optional[str]
163
+ ) -> Optional[Type[BaseModel]]:
164
+ r"""Resolves the response format by name.
165
+
166
+ Args:
167
+ name (str or None): Optional format name.
168
+
169
+ Returns:
170
+ Optional[Type[BaseModel]]: Response schema class.
171
+ """
172
+ if name is None:
173
+ return None
174
+ if name not in self.response_format_registry:
175
+ raise HTTPException(
176
+ status_code=400, detail=f"Unknown response_format: {name}"
177
+ )
178
+ return self.response_format_registry[name]
179
+
180
+ def _setup_routes(self):
181
+ r"""Registers OpenAPI endpoints for agent creation and interaction.
182
+
183
+ This includes routes for initializing agents (/init), sending
184
+ messages (/step and /astep), resetting agent memory (/reset), and
185
+ retrieving conversation history (/history). All routes are added
186
+ under the /v1/agents namespace.
187
+ """
188
+
189
+ router = APIRouter(prefix="/v1/agents")
190
+
191
+ @router.post("/init")
192
+ def init_agent(request: InitRequest):
193
+ r"""Initializes a ChatAgent instance with a model,
194
+ system message, and optional tools.
195
+
196
+ Args:
197
+ request (InitRequest): The agent config including
198
+ model, tools, system message, and agent ID.
199
+
200
+ Returns:
201
+ dict: A message with the agent ID and status.
202
+ """
203
+
204
+ agent_id = request.agent_id
205
+ if agent_id in self.agents:
206
+ return {
207
+ "agent_id": agent_id,
208
+ "message": "Agent already exists.",
209
+ }
210
+
211
+ model_type = request.model_type
212
+ model_platform = request.model_platform
213
+
214
+ model = ModelFactory.create(
215
+ model_platform=model_platform, # type: ignore[arg-type]
216
+ model_type=model_type, # type: ignore[arg-type]
217
+ )
218
+
219
+ # tools lookup
220
+ tools = []
221
+ if request.tools_names:
222
+ for name in request.tools_names:
223
+ if name in self.tool_registry:
224
+ tools.extend(self.tool_registry[name])
225
+ else:
226
+ raise HTTPException(
227
+ status_code=400,
228
+ detail=f"Tool '{name}' " f"not found in registry",
229
+ )
230
+
231
+ # system message
232
+ system_message = request.system_message
233
+
234
+ agent = ChatAgent(
235
+ model=model,
236
+ tools=tools, # type: ignore[arg-type]
237
+ external_tools=request.external_tools, # type: ignore[arg-type]
238
+ system_message=system_message,
239
+ message_window_size=request.message_window_size,
240
+ token_limit=request.token_limit,
241
+ output_language=request.output_language,
242
+ max_iteration=request.max_iteration,
243
+ agent_id=agent_id,
244
+ )
245
+
246
+ self.agents[agent_id] = agent
247
+ return {"agent_id": agent_id, "message": "Agent initialized."}
248
+
249
+ @router.post("/astep/{agent_id}")
250
+ async def astep_agent(agent_id: str, request: StepRequest):
251
+ r"""Runs one async step of agent response.
252
+
253
+ Args:
254
+ agent_id (str): The ID of the target agent.
255
+ request (StepRequest): The input message.
256
+
257
+ Returns:
258
+ dict: The model response in serialized form.
259
+ """
260
+
261
+ if agent_id not in self.agents:
262
+ raise HTTPException(status_code=404, detail="Agent not found.")
263
+
264
+ agent = self.agents[agent_id]
265
+ input_message = self._parse_input_message_for_step(
266
+ request.input_message
267
+ )
268
+ format_cls = self._resolve_response_format_for_step(
269
+ request.response_format
270
+ )
271
+
272
+ try:
273
+ response = await agent.astep(
274
+ input_message=input_message, response_format=format_cls
275
+ )
276
+ return response.model_dump()
277
+ except Exception as e:
278
+ raise HTTPException(
279
+ status_code=500,
280
+ detail=f"Unexpected error during async step: {e!s}",
281
+ )
282
+
283
+ @router.get("/list_agent_ids")
284
+ def list_agent_ids():
285
+ r"""Returns a list of all active agent IDs.
286
+
287
+ Returns:
288
+ dict: A dictionary containing all registered agent IDs.
289
+ """
290
+ return {"agent_ids": list(self.agents.keys())}
291
+
292
+ @router.post("/delete/{agent_id}")
293
+ def delete_agent(agent_id: str):
294
+ r"""Deletes an agent from the server.
295
+
296
+ Args:
297
+ agent_id (str): The ID of the agent to delete.
298
+
299
+ Returns:
300
+ dict: A confirmation message upon successful deletion.
301
+ """
302
+ if agent_id not in self.agents:
303
+ raise HTTPException(status_code=404, detail="Agent not found.")
304
+
305
+ del self.agents[agent_id]
306
+ return {"message": f"Agent {agent_id} deleted."}
307
+
308
+ @router.post("/step/{agent_id}")
309
+ def step_agent(agent_id: str, request: StepRequest):
310
+ r"""Runs one step of synchronous agent response.
311
+
312
+ Args:
313
+ agent_id (str): The ID of the target agent.
314
+ request (StepRequest): The input message.
315
+
316
+ Returns:
317
+ dict: The model response in serialized form.
318
+ """
319
+ if agent_id not in self.agents:
320
+ raise HTTPException(status_code=404, detail="Agent not found.")
321
+
322
+ agent = self.agents[agent_id]
323
+ input_message = self._parse_input_message_for_step(
324
+ request.input_message
325
+ )
326
+ format_cls = self._resolve_response_format_for_step(
327
+ request.response_format
328
+ )
329
+ try:
330
+ response = agent.step(
331
+ input_message=input_message, response_format=format_cls
332
+ )
333
+ return response.model_dump()
334
+ except Exception as e:
335
+ raise HTTPException(
336
+ status_code=500,
337
+ detail=f"Unexpected error during step: {e!s}",
338
+ )
339
+
340
+ @router.post("/reset/{agent_id}")
341
+ def reset_agent(agent_id: str):
342
+ r"""Clears memory for a specific agent.
343
+
344
+ Args:
345
+ agent_id (str): The ID of the agent to reset.
346
+
347
+ Returns:
348
+ dict: A message confirming reset success.
349
+ """
350
+ if agent_id not in self.agents:
351
+ raise HTTPException(status_code=404, detail="Agent not found.")
352
+ self.agents[agent_id].reset()
353
+ return {"message": f"Agent {agent_id} reset."}
354
+
355
+ @router.get("/history/{agent_id}")
356
+ def get_agent_chat_history(agent_id: str):
357
+ r"""Returns the chat history of an agent.
358
+
359
+ Args:
360
+ agent_id (str): The ID of the agent to query.
361
+
362
+ Returns:
363
+ list: The list of conversation messages.
364
+ """
365
+ if agent_id not in self.agents:
366
+ raise HTTPException(
367
+ status_code=404, detail=f"Agent {agent_id} not found."
368
+ )
369
+ return self.agents[agent_id].chat_history
370
+
371
+ # Register all routes to the main FastAPI app
372
+ self.app.include_router(router)
373
+
374
+ def get_app(self) -> FastAPI:
375
+ r"""Returns the FastAPI app instance.
376
+
377
+ Returns:
378
+ FastAPI: The wrapped application object.
379
+ """
380
+ return self.app
@@ -77,7 +77,7 @@ from .aci_toolkit import ACIToolkit
77
77
  from .playwright_mcp_toolkit import PlaywrightMCPToolkit
78
78
  from .wolfram_alpha_toolkit import WolframAlphaToolkit
79
79
  from .task_planning_toolkit import TaskPlanningToolkit
80
- from .non_visual_browser_toolkit import BrowserNonVisualToolkit
80
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
81
81
  from .edgeone_pages_mcp_toolkit import EdgeOnePagesMCPToolkit
82
82
  from .google_drive_mcp_toolkit import GoogleDriveMCPToolkit
83
83
  from .craw4ai_toolkit import Crawl4AIToolkit
@@ -146,7 +146,7 @@ __all__ = [
146
146
  'WolframAlphaToolkit',
147
147
  'BohriumToolkit',
148
148
  'TaskPlanningToolkit',
149
- 'BrowserNonVisualToolkit',
149
+ 'HybridBrowserToolkit',
150
150
  'EdgeOnePagesMCPToolkit',
151
151
  'GoogleDriveMCPToolkit',
152
152
  'Crawl4AIToolkit',
@@ -11,8 +11,8 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
- from .browser_non_visual_toolkit import BrowserNonVisualToolkit
14
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
15
15
 
16
16
  __all__ = [
17
- "BrowserNonVisualToolkit",
17
+ "HybridBrowserToolkit",
18
18
  ]
@@ -24,6 +24,7 @@ class ActionExecutor:
24
24
  # Configuration constants
25
25
  DEFAULT_TIMEOUT = 5000 # 5 seconds
26
26
  SHORT_TIMEOUT = 2000 # 2 seconds
27
+ MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
27
28
 
28
29
  def __init__(self, page: "Page"):
29
30
  self.page = page
@@ -32,6 +33,7 @@ class ActionExecutor:
32
33
  # Public helpers
33
34
  # ------------------------------------------------------------------
34
35
  async def execute(self, action: Dict[str, Any]) -> str:
36
+ r"""Execute an action and return the result description."""
35
37
  if not action:
36
38
  return "No action to execute"
37
39
 
@@ -64,32 +66,46 @@ class ActionExecutor:
64
66
  # Internal handlers
65
67
  # ------------------------------------------------------------------
66
68
  async def _click(self, action: Dict[str, Any]) -> str:
69
+ r"""Handle click actions with multiple fallback strategies."""
67
70
  ref = action.get("ref")
68
71
  text = action.get("text")
69
72
  selector = action.get("selector")
70
73
  if not (ref or text or selector):
71
74
  return "Error: click requires ref/text/selector"
72
75
 
76
+ # Build strategies in priority order: ref > selector > text
73
77
  strategies = []
78
+ if ref:
79
+ strategies.append(f"[aria-ref='{ref}']")
74
80
  if selector:
75
81
  strategies.append(selector)
76
82
  if text:
77
83
  strategies.append(f'text="{text}"')
78
- if ref:
79
- strategies.append(f"[aria-ref='{ref}']")
80
84
 
85
+ # Strategy 1: Try Playwright force click for each selector
81
86
  for sel in strategies:
82
87
  try:
83
88
  if await self.page.locator(sel).count() > 0:
84
89
  await self.page.click(
85
- sel, timeout=self.SHORT_TIMEOUT, force=True
90
+ sel, timeout=self.DEFAULT_TIMEOUT, force=True
86
91
  )
87
- return f"Clicked element via {sel}"
92
+ return f"Clicked element via force: {sel}"
88
93
  except Exception:
89
- pass
90
- return "Error: Could not click element"
94
+ continue
95
+
96
+ # Strategy 2: Try JavaScript click as fallback
97
+ for sel in strategies:
98
+ try:
99
+ await self.page.locator(sel).first.evaluate("el => el.click()")
100
+ await asyncio.sleep(0.1) # Brief wait for effects
101
+ return f"Clicked element via JS: {sel}"
102
+ except Exception:
103
+ continue
104
+
105
+ return "Error: All click strategies failed"
91
106
 
92
107
  async def _type(self, action: Dict[str, Any]) -> str:
108
+ r"""Handle typing text into input fields."""
93
109
  ref = action.get("ref")
94
110
  selector = action.get("selector")
95
111
  text = action.get("text", "")
@@ -103,6 +119,7 @@ class ActionExecutor:
103
119
  return f"Type failed: {exc}"
104
120
 
105
121
  async def _select(self, action: Dict[str, Any]) -> str:
122
+ r"""Handle selecting options from dropdowns."""
106
123
  ref = action.get("ref")
107
124
  selector = action.get("selector")
108
125
  value = action.get("value", "")
@@ -118,8 +135,9 @@ class ActionExecutor:
118
135
  return f"Select failed: {exc}"
119
136
 
120
137
  async def _wait(self, action: Dict[str, Any]) -> str:
138
+ r"""Handle wait actions."""
121
139
  if "timeout" in action:
122
- ms = action["timeout"]
140
+ ms = int(action["timeout"])
123
141
  await asyncio.sleep(ms / 1000)
124
142
  return f"Waited {ms}ms"
125
143
  if "selector" in action:
@@ -131,6 +149,7 @@ class ActionExecutor:
131
149
  return "Error: wait requires timeout/selector"
132
150
 
133
151
  async def _extract(self, action: Dict[str, Any]) -> str:
152
+ r"""Handle text extraction from elements."""
134
153
  ref = action.get("ref")
135
154
  if not ref:
136
155
  return "Error: extract requires ref"
@@ -140,6 +159,7 @@ class ActionExecutor:
140
159
  return f"Extracted: {txt[:100] if txt else 'None'}"
141
160
 
142
161
  async def _scroll(self, action: Dict[str, Any]) -> str:
162
+ r"""Handle page scrolling with safe parameter validation."""
143
163
  direction = action.get("direction", "down")
144
164
  amount = action.get("amount", 300)
145
165
 
@@ -151,18 +171,22 @@ class ActionExecutor:
151
171
  # Safely convert amount to integer and clamp to reasonable range
152
172
  amount_int = int(amount)
153
173
  amount_int = max(
154
- -5000, min(5000, amount_int)
155
- ) # Clamp between -5000 and 5000
174
+ -self.MAX_SCROLL_AMOUNT,
175
+ min(self.MAX_SCROLL_AMOUNT, amount_int),
176
+ ) # Clamp to MAX_SCROLL_AMOUNT range
156
177
  except (ValueError, TypeError):
157
178
  return "Error: amount must be a valid number"
158
179
 
159
180
  # Use safe evaluation with bound parameters
160
181
  scroll_offset = amount_int if direction == "down" else -amount_int
161
- await self.page.evaluate(f"window.scrollBy(0, {scroll_offset})")
182
+ await self.page.evaluate(
183
+ "offset => window.scrollBy(0, offset)", scroll_offset
184
+ )
162
185
  await asyncio.sleep(0.5)
163
186
  return f"Scrolled {direction} by {abs(amount_int)}px"
164
187
 
165
188
  async def _enter(self, action: Dict[str, Any]) -> str:
189
+ r"""Handle Enter key press actions."""
166
190
  ref = action.get("ref")
167
191
  selector = action.get("selector")
168
192
  if ref:
@@ -175,16 +199,28 @@ class ActionExecutor:
175
199
 
176
200
  # utilities
177
201
  async def _wait_dom_stable(self) -> None:
202
+ r"""Wait for DOM to become stable before executing actions."""
178
203
  try:
204
+ # Wait for basic DOM content loading
179
205
  await self.page.wait_for_load_state(
180
206
  'domcontentloaded', timeout=self.SHORT_TIMEOUT
181
207
  )
208
+
209
+ # Try to wait for network idle briefly
210
+ try:
211
+ await self.page.wait_for_load_state(
212
+ 'networkidle', timeout=self.SHORT_TIMEOUT
213
+ )
214
+ except Exception:
215
+ pass # Network idle is optional
216
+
182
217
  except Exception:
183
- pass
218
+ pass # Don't fail if wait times out
184
219
 
185
220
  # static helpers
186
221
  @staticmethod
187
222
  def should_update_snapshot(action: Dict[str, Any]) -> bool:
223
+ r"""Determine if an action requires a snapshot update."""
188
224
  change_types = {
189
225
  "click",
190
226
  "type",
@@ -12,24 +12,24 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  import json
15
- import logging
16
15
  import re
17
16
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
18
17
 
18
+ from camel.logger import get_logger
19
19
  from camel.models import BaseModelBackend, ModelFactory
20
20
  from camel.types import ModelPlatformType, ModelType
21
21
 
22
22
  from .actions import ActionExecutor
23
- from .nv_browser_session import NVBrowserSession
23
+ from .browser_session import NVBrowserSession
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from camel.agents import ChatAgent
27
27
 
28
- logger = logging.getLogger(__name__)
28
+ logger = get_logger(__name__)
29
29
 
30
30
 
31
31
  class PlaywrightLLMAgent:
32
- """High-level orchestration: snapshot ↔ LLM ↔ action executor."""
32
+ r"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
33
33
 
34
34
  # System prompt as class constant to avoid recreation
35
35
  SYSTEM_PROMPT = """
@@ -90,8 +90,8 @@ what was accomplished
90
90
  self.action_history: List[Dict[str, Any]] = []
91
91
  if model_backend is None:
92
92
  model_backend = ModelFactory.create(
93
- model_platform=ModelPlatformType.OPENAI,
94
- model_type=ModelType.GPT_4O_MINI,
93
+ model_platform=ModelPlatformType.DEFAULT,
94
+ model_type=ModelType.DEFAULT,
95
95
  model_config_dict={"temperature": 0, "top_p": 1},
96
96
  )
97
97
  self.model_backend = model_backend
@@ -99,16 +99,19 @@ what was accomplished
99
99
  self._chat_agent: Optional[ChatAgent] = None
100
100
 
101
101
  async def navigate(self, url: str) -> str:
102
+ r"""Navigate to a URL and return the snapshot."""
102
103
  try:
103
104
  # NVBrowserSession handles waits internally
104
105
  logger.debug("Navigated to URL: %s", url)
105
106
  await self._session.visit(url)
106
107
  return await self._session.get_snapshot(force_refresh=True)
107
108
  except Exception as exc:
108
- return f"Error: could not navigate - {exc}"
109
+ error_msg = f"Error: could not navigate to {url} - {exc}"
110
+ logger.error(error_msg)
111
+ return error_msg
109
112
 
110
113
  def _get_chat_agent(self) -> "ChatAgent":
111
- """Get or create the ChatAgent instance."""
114
+ r"""Get or create the ChatAgent instance."""
112
115
  from camel.agents import ChatAgent
113
116
 
114
117
  if self._chat_agent is None:
@@ -165,12 +168,16 @@ what was accomplished
165
168
  logger.warning(
166
169
  "Could not parse JSON from LLM response: %s", content[:200]
167
170
  )
171
+ return self._get_fallback_response("Parsing error")
172
+
173
+ def _get_fallback_response(self, error_msg: str) -> Dict[str, Any]:
174
+ r"""Generate a fallback response structure."""
168
175
  return {
169
- "plan": ["Could not parse response"],
176
+ "plan": [f"Could not parse response: {error_msg}"],
170
177
  "action": {
171
178
  "type": "finish",
172
179
  "ref": None,
173
- "summary": "Parsing error",
180
+ "summary": f"Parsing error: {error_msg}",
174
181
  },
175
182
  }
176
183
 
@@ -181,7 +188,7 @@ what was accomplished
181
188
  is_initial: bool,
182
189
  history: Optional[List[Dict[str, Any]]] = None,
183
190
  ) -> Dict[str, Any]:
184
- """Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
191
+ r"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
185
192
  # Build user message
186
193
  if is_initial:
187
194
  user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
@@ -208,6 +215,7 @@ what was accomplished
208
215
  return self._safe_parse_json(content)
209
216
 
210
217
  async def process_command(self, prompt: str, max_steps: int = 15):
218
+ r"""Process a command using LLM-guided browser automation."""
211
219
  # initial full snapshot
212
220
  full_snapshot = await self._session.get_snapshot()
213
221
  assert self._session.snapshot is not None
@@ -270,9 +278,11 @@ what was accomplished
270
278
  logger.info("Process completed with %d steps", steps)
271
279
 
272
280
  async def _run_action(self, action: Dict[str, Any]) -> str:
281
+ r"""Execute a single action and return the result."""
273
282
  if action.get("type") == "navigate":
274
283
  return await self.navigate(action.get("url", ""))
275
284
  return await self._session.exec_action(action)
276
285
 
277
286
  async def close(self):
287
+ r"""Clean up browser session and resources."""
278
288
  await self._session.close()