camel-ai 0.2.71a2__py3-none-any.whl → 0.2.71a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +6 -2
- camel/agents/chat_agent.py +297 -16
- camel/messages/base.py +2 -6
- camel/services/agent_openapi_server.py +380 -0
- camel/toolkits/__init__.py +2 -2
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
- camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
- camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
- camel/types/agents/tool_calling_record.py +4 -1
- camel/types/enums.py +24 -24
- camel/utils/tool_result.py +44 -0
- {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +16 -2
- {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +20 -18
- camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
- {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
17
|
+
|
|
18
|
+
from fastapi import APIRouter, FastAPI, HTTPException
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
|
|
21
|
+
from camel.agents.chat_agent import ChatAgent
|
|
22
|
+
from camel.messages import BaseMessage
|
|
23
|
+
from camel.models import ModelFactory
|
|
24
|
+
from camel.toolkits import FunctionTool
|
|
25
|
+
from camel.types import RoleType
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class InitRequest(BaseModel):
|
|
29
|
+
r"""Request schema for initializing a ChatAgent via the OpenAPI server.
|
|
30
|
+
|
|
31
|
+
Defines the configuration used to create a new agent, including the model,
|
|
32
|
+
system message, tool names, and generation parameters.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
model_type (Optional[str]): The model type to use. Should match a key
|
|
36
|
+
supported by the model manager, e.g., "gpt-4o-mini".
|
|
37
|
+
(default: :obj:`"gpt-4o-mini"`)
|
|
38
|
+
model_platform (Optional[str]): The model platform to use.
|
|
39
|
+
(default: :obj:`"openai"`)
|
|
40
|
+
tools_names (Optional[List[str]]): A list of tool names to load from
|
|
41
|
+
the tool registry. These tools will be available to the agent.
|
|
42
|
+
(default: :obj:`None`)
|
|
43
|
+
external_tools (Optional[List[Dict[str, Any]]]): Tool definitions
|
|
44
|
+
provided directly as dictionaries, bypassing the registry.
|
|
45
|
+
Currently not supported. (default: :obj:`None`)
|
|
46
|
+
agent_id (str): The unique identifier for the agent. Must be provided
|
|
47
|
+
explicitly to support multi-agent routing and control.
|
|
48
|
+
system_message (Optional[str]): The system prompt for the agent,
|
|
49
|
+
describing its behavior or role. (default: :obj:`None`)
|
|
50
|
+
message_window_size (Optional[int]): The number of recent messages to
|
|
51
|
+
retain in memory for context. (default: :obj:`None`)
|
|
52
|
+
token_limit (Optional[int]): The token budget for contextual memory.
|
|
53
|
+
(default: :obj:`None`)
|
|
54
|
+
output_language (Optional[str]): Preferred output language for the
|
|
55
|
+
agent's replies. (default: :obj:`None`)
|
|
56
|
+
max_iteration (Optional[int]): Maximum number of model
|
|
57
|
+
calling iterations allowed per step. If `None` (default), there's
|
|
58
|
+
no explicit limit. If `1`, it performs a single model call. If `N
|
|
59
|
+
> 1`, it allows up to N model calls. (default: :obj:`None`)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
model_type: Optional[str] = "gpt-4o-mini"
|
|
63
|
+
model_platform: Optional[str] = "openai"
|
|
64
|
+
|
|
65
|
+
tools_names: Optional[List[str]] = None
|
|
66
|
+
external_tools: Optional[List[Dict[str, Any]]] = None
|
|
67
|
+
|
|
68
|
+
agent_id: str # Required: explicitly set agent_id to
|
|
69
|
+
# support future multi-agent and permission control
|
|
70
|
+
|
|
71
|
+
system_message: Optional[str] = None
|
|
72
|
+
message_window_size: Optional[int] = None
|
|
73
|
+
token_limit: Optional[int] = None
|
|
74
|
+
output_language: Optional[str] = None
|
|
75
|
+
max_iteration: Optional[int] = None # Changed from Optional[bool] = False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class StepRequest(BaseModel):
|
|
79
|
+
r"""Request schema for sending a user message to a ChatAgent.
|
|
80
|
+
|
|
81
|
+
Supports plain text input or structured message dictionaries, with an
|
|
82
|
+
optional response format for controlling output structure.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
input_message (Union[str, Dict[str, Any]]): The user message to send.
|
|
86
|
+
Can be a plain string or a message dict with role, content, etc.
|
|
87
|
+
response_format (Optional[str]): Optional format name that maps to a
|
|
88
|
+
registered response schema. Not currently in use.
|
|
89
|
+
(default: :obj:`None`)
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
input_message: Union[str, Dict[str, Any]]
|
|
93
|
+
response_format: Optional[str] = None # reserved, not used yet
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class ChatAgentOpenAPIServer:
|
|
97
|
+
r"""A FastAPI server wrapper for managing ChatAgents via OpenAPI routes.
|
|
98
|
+
|
|
99
|
+
This server exposes a versioned REST API for interacting with CAMEL
|
|
100
|
+
agents, supporting initialization, message passing, memory inspection,
|
|
101
|
+
and optional tool usage. It supports multi-agent use cases by mapping
|
|
102
|
+
unique agent IDs to active ChatAgent instances.
|
|
103
|
+
|
|
104
|
+
Typical usage includes initializing agents with system prompts and tools,
|
|
105
|
+
exchanging messages using /step or /astep endpoints, and inspecting agent
|
|
106
|
+
memory with /history.
|
|
107
|
+
|
|
108
|
+
Supports pluggable tool and response format registries for customizing
|
|
109
|
+
agent behavior or output schemas.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
tool_registry: Optional[Dict[str, List[FunctionTool]]] = None,
|
|
115
|
+
response_format_registry: Optional[Dict[str, Type[BaseModel]]] = None,
|
|
116
|
+
):
|
|
117
|
+
r"""Initializes the OpenAPI server for managing ChatAgents.
|
|
118
|
+
|
|
119
|
+
Sets up internal agent storage, tool and response format registries,
|
|
120
|
+
and prepares versioned API routes.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
tool_registry (Optional[Dict[str, List[FunctionTool]]]): A mapping
|
|
124
|
+
from tool names to lists of FunctionTool instances available
|
|
125
|
+
to agents via the "tools_names" field. If not provided, an
|
|
126
|
+
empty registry is used. (default: :obj:`None`)
|
|
127
|
+
response_format_registry (Optional[Dict[str, Type[BaseModel]]]):
|
|
128
|
+
A mapping from format names to Pydantic output schemas for
|
|
129
|
+
structured response parsing. Used for controlling the format
|
|
130
|
+
of step results. (default: :obj:`None`)
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
# Initialize FastAPI app and agent
|
|
134
|
+
self.app = FastAPI(title="CAMEL OpenAPI-compatible Server")
|
|
135
|
+
self.agents: Dict[str, ChatAgent] = {}
|
|
136
|
+
self.tool_registry = tool_registry or {}
|
|
137
|
+
self.response_format_registry = response_format_registry or {}
|
|
138
|
+
self._setup_routes()
|
|
139
|
+
|
|
140
|
+
def _parse_input_message_for_step(
|
|
141
|
+
self, raw: Union[str, dict]
|
|
142
|
+
) -> BaseMessage:
|
|
143
|
+
r"""Parses raw input into a BaseMessage object.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
raw (str or dict): User input as plain text or dict.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
BaseMessage: Parsed input message.
|
|
150
|
+
"""
|
|
151
|
+
if isinstance(raw, str):
|
|
152
|
+
return BaseMessage.make_user_message(role_name="User", content=raw)
|
|
153
|
+
elif isinstance(raw, dict):
|
|
154
|
+
if isinstance(raw.get("role_type"), str):
|
|
155
|
+
raw["role_type"] = RoleType(raw["role_type"].lower())
|
|
156
|
+
return BaseMessage(**raw)
|
|
157
|
+
raise HTTPException(
|
|
158
|
+
status_code=400, detail="Unsupported input format."
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def _resolve_response_format_for_step(
|
|
162
|
+
self, name: Optional[str]
|
|
163
|
+
) -> Optional[Type[BaseModel]]:
|
|
164
|
+
r"""Resolves the response format by name.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
name (str or None): Optional format name.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Optional[Type[BaseModel]]: Response schema class.
|
|
171
|
+
"""
|
|
172
|
+
if name is None:
|
|
173
|
+
return None
|
|
174
|
+
if name not in self.response_format_registry:
|
|
175
|
+
raise HTTPException(
|
|
176
|
+
status_code=400, detail=f"Unknown response_format: {name}"
|
|
177
|
+
)
|
|
178
|
+
return self.response_format_registry[name]
|
|
179
|
+
|
|
180
|
+
def _setup_routes(self):
|
|
181
|
+
r"""Registers OpenAPI endpoints for agent creation and interaction.
|
|
182
|
+
|
|
183
|
+
This includes routes for initializing agents (/init), sending
|
|
184
|
+
messages (/step and /astep), resetting agent memory (/reset), and
|
|
185
|
+
retrieving conversation history (/history). All routes are added
|
|
186
|
+
under the /v1/agents namespace.
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
router = APIRouter(prefix="/v1/agents")
|
|
190
|
+
|
|
191
|
+
@router.post("/init")
|
|
192
|
+
def init_agent(request: InitRequest):
|
|
193
|
+
r"""Initializes a ChatAgent instance with a model,
|
|
194
|
+
system message, and optional tools.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
request (InitRequest): The agent config including
|
|
198
|
+
model, tools, system message, and agent ID.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
dict: A message with the agent ID and status.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
agent_id = request.agent_id
|
|
205
|
+
if agent_id in self.agents:
|
|
206
|
+
return {
|
|
207
|
+
"agent_id": agent_id,
|
|
208
|
+
"message": "Agent already exists.",
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
model_type = request.model_type
|
|
212
|
+
model_platform = request.model_platform
|
|
213
|
+
|
|
214
|
+
model = ModelFactory.create(
|
|
215
|
+
model_platform=model_platform, # type: ignore[arg-type]
|
|
216
|
+
model_type=model_type, # type: ignore[arg-type]
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# tools lookup
|
|
220
|
+
tools = []
|
|
221
|
+
if request.tools_names:
|
|
222
|
+
for name in request.tools_names:
|
|
223
|
+
if name in self.tool_registry:
|
|
224
|
+
tools.extend(self.tool_registry[name])
|
|
225
|
+
else:
|
|
226
|
+
raise HTTPException(
|
|
227
|
+
status_code=400,
|
|
228
|
+
detail=f"Tool '{name}' " f"not found in registry",
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# system message
|
|
232
|
+
system_message = request.system_message
|
|
233
|
+
|
|
234
|
+
agent = ChatAgent(
|
|
235
|
+
model=model,
|
|
236
|
+
tools=tools, # type: ignore[arg-type]
|
|
237
|
+
external_tools=request.external_tools, # type: ignore[arg-type]
|
|
238
|
+
system_message=system_message,
|
|
239
|
+
message_window_size=request.message_window_size,
|
|
240
|
+
token_limit=request.token_limit,
|
|
241
|
+
output_language=request.output_language,
|
|
242
|
+
max_iteration=request.max_iteration,
|
|
243
|
+
agent_id=agent_id,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
self.agents[agent_id] = agent
|
|
247
|
+
return {"agent_id": agent_id, "message": "Agent initialized."}
|
|
248
|
+
|
|
249
|
+
@router.post("/astep/{agent_id}")
|
|
250
|
+
async def astep_agent(agent_id: str, request: StepRequest):
|
|
251
|
+
r"""Runs one async step of agent response.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
agent_id (str): The ID of the target agent.
|
|
255
|
+
request (StepRequest): The input message.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
dict: The model response in serialized form.
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
if agent_id not in self.agents:
|
|
262
|
+
raise HTTPException(status_code=404, detail="Agent not found.")
|
|
263
|
+
|
|
264
|
+
agent = self.agents[agent_id]
|
|
265
|
+
input_message = self._parse_input_message_for_step(
|
|
266
|
+
request.input_message
|
|
267
|
+
)
|
|
268
|
+
format_cls = self._resolve_response_format_for_step(
|
|
269
|
+
request.response_format
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
response = await agent.astep(
|
|
274
|
+
input_message=input_message, response_format=format_cls
|
|
275
|
+
)
|
|
276
|
+
return response.model_dump()
|
|
277
|
+
except Exception as e:
|
|
278
|
+
raise HTTPException(
|
|
279
|
+
status_code=500,
|
|
280
|
+
detail=f"Unexpected error during async step: {e!s}",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
@router.get("/list_agent_ids")
|
|
284
|
+
def list_agent_ids():
|
|
285
|
+
r"""Returns a list of all active agent IDs.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
dict: A dictionary containing all registered agent IDs.
|
|
289
|
+
"""
|
|
290
|
+
return {"agent_ids": list(self.agents.keys())}
|
|
291
|
+
|
|
292
|
+
@router.post("/delete/{agent_id}")
|
|
293
|
+
def delete_agent(agent_id: str):
|
|
294
|
+
r"""Deletes an agent from the server.
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
agent_id (str): The ID of the agent to delete.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
dict: A confirmation message upon successful deletion.
|
|
301
|
+
"""
|
|
302
|
+
if agent_id not in self.agents:
|
|
303
|
+
raise HTTPException(status_code=404, detail="Agent not found.")
|
|
304
|
+
|
|
305
|
+
del self.agents[agent_id]
|
|
306
|
+
return {"message": f"Agent {agent_id} deleted."}
|
|
307
|
+
|
|
308
|
+
@router.post("/step/{agent_id}")
|
|
309
|
+
def step_agent(agent_id: str, request: StepRequest):
|
|
310
|
+
r"""Runs one step of synchronous agent response.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
agent_id (str): The ID of the target agent.
|
|
314
|
+
request (StepRequest): The input message.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
dict: The model response in serialized form.
|
|
318
|
+
"""
|
|
319
|
+
if agent_id not in self.agents:
|
|
320
|
+
raise HTTPException(status_code=404, detail="Agent not found.")
|
|
321
|
+
|
|
322
|
+
agent = self.agents[agent_id]
|
|
323
|
+
input_message = self._parse_input_message_for_step(
|
|
324
|
+
request.input_message
|
|
325
|
+
)
|
|
326
|
+
format_cls = self._resolve_response_format_for_step(
|
|
327
|
+
request.response_format
|
|
328
|
+
)
|
|
329
|
+
try:
|
|
330
|
+
response = agent.step(
|
|
331
|
+
input_message=input_message, response_format=format_cls
|
|
332
|
+
)
|
|
333
|
+
return response.model_dump()
|
|
334
|
+
except Exception as e:
|
|
335
|
+
raise HTTPException(
|
|
336
|
+
status_code=500,
|
|
337
|
+
detail=f"Unexpected error during step: {e!s}",
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
@router.post("/reset/{agent_id}")
|
|
341
|
+
def reset_agent(agent_id: str):
|
|
342
|
+
r"""Clears memory for a specific agent.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
agent_id (str): The ID of the agent to reset.
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
dict: A message confirming reset success.
|
|
349
|
+
"""
|
|
350
|
+
if agent_id not in self.agents:
|
|
351
|
+
raise HTTPException(status_code=404, detail="Agent not found.")
|
|
352
|
+
self.agents[agent_id].reset()
|
|
353
|
+
return {"message": f"Agent {agent_id} reset."}
|
|
354
|
+
|
|
355
|
+
@router.get("/history/{agent_id}")
|
|
356
|
+
def get_agent_chat_history(agent_id: str):
|
|
357
|
+
r"""Returns the chat history of an agent.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
agent_id (str): The ID of the agent to query.
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
list: The list of conversation messages.
|
|
364
|
+
"""
|
|
365
|
+
if agent_id not in self.agents:
|
|
366
|
+
raise HTTPException(
|
|
367
|
+
status_code=404, detail=f"Agent {agent_id} not found."
|
|
368
|
+
)
|
|
369
|
+
return self.agents[agent_id].chat_history
|
|
370
|
+
|
|
371
|
+
# Register all routes to the main FastAPI app
|
|
372
|
+
self.app.include_router(router)
|
|
373
|
+
|
|
374
|
+
def get_app(self) -> FastAPI:
|
|
375
|
+
r"""Returns the FastAPI app instance.
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
FastAPI: The wrapped application object.
|
|
379
|
+
"""
|
|
380
|
+
return self.app
|
camel/toolkits/__init__.py
CHANGED
|
@@ -77,7 +77,7 @@ from .aci_toolkit import ACIToolkit
|
|
|
77
77
|
from .playwright_mcp_toolkit import PlaywrightMCPToolkit
|
|
78
78
|
from .wolfram_alpha_toolkit import WolframAlphaToolkit
|
|
79
79
|
from .task_planning_toolkit import TaskPlanningToolkit
|
|
80
|
-
from .
|
|
80
|
+
from .hybrid_browser_toolkit import HybridBrowserToolkit
|
|
81
81
|
from .edgeone_pages_mcp_toolkit import EdgeOnePagesMCPToolkit
|
|
82
82
|
from .google_drive_mcp_toolkit import GoogleDriveMCPToolkit
|
|
83
83
|
from .craw4ai_toolkit import Crawl4AIToolkit
|
|
@@ -146,7 +146,7 @@ __all__ = [
|
|
|
146
146
|
'WolframAlphaToolkit',
|
|
147
147
|
'BohriumToolkit',
|
|
148
148
|
'TaskPlanningToolkit',
|
|
149
|
-
'
|
|
149
|
+
'HybridBrowserToolkit',
|
|
150
150
|
'EdgeOnePagesMCPToolkit',
|
|
151
151
|
'GoogleDriveMCPToolkit',
|
|
152
152
|
'Crawl4AIToolkit',
|
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
-
from .
|
|
14
|
+
from .hybrid_browser_toolkit import HybridBrowserToolkit
|
|
15
15
|
|
|
16
16
|
__all__ = [
|
|
17
|
-
"
|
|
17
|
+
"HybridBrowserToolkit",
|
|
18
18
|
]
|
|
@@ -24,6 +24,7 @@ class ActionExecutor:
|
|
|
24
24
|
# Configuration constants
|
|
25
25
|
DEFAULT_TIMEOUT = 5000 # 5 seconds
|
|
26
26
|
SHORT_TIMEOUT = 2000 # 2 seconds
|
|
27
|
+
MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
|
|
27
28
|
|
|
28
29
|
def __init__(self, page: "Page"):
|
|
29
30
|
self.page = page
|
|
@@ -32,6 +33,7 @@ class ActionExecutor:
|
|
|
32
33
|
# Public helpers
|
|
33
34
|
# ------------------------------------------------------------------
|
|
34
35
|
async def execute(self, action: Dict[str, Any]) -> str:
|
|
36
|
+
r"""Execute an action and return the result description."""
|
|
35
37
|
if not action:
|
|
36
38
|
return "No action to execute"
|
|
37
39
|
|
|
@@ -64,32 +66,46 @@ class ActionExecutor:
|
|
|
64
66
|
# Internal handlers
|
|
65
67
|
# ------------------------------------------------------------------
|
|
66
68
|
async def _click(self, action: Dict[str, Any]) -> str:
|
|
69
|
+
r"""Handle click actions with multiple fallback strategies."""
|
|
67
70
|
ref = action.get("ref")
|
|
68
71
|
text = action.get("text")
|
|
69
72
|
selector = action.get("selector")
|
|
70
73
|
if not (ref or text or selector):
|
|
71
74
|
return "Error: click requires ref/text/selector"
|
|
72
75
|
|
|
76
|
+
# Build strategies in priority order: ref > selector > text
|
|
73
77
|
strategies = []
|
|
78
|
+
if ref:
|
|
79
|
+
strategies.append(f"[aria-ref='{ref}']")
|
|
74
80
|
if selector:
|
|
75
81
|
strategies.append(selector)
|
|
76
82
|
if text:
|
|
77
83
|
strategies.append(f'text="{text}"')
|
|
78
|
-
if ref:
|
|
79
|
-
strategies.append(f"[aria-ref='{ref}']")
|
|
80
84
|
|
|
85
|
+
# Strategy 1: Try Playwright force click for each selector
|
|
81
86
|
for sel in strategies:
|
|
82
87
|
try:
|
|
83
88
|
if await self.page.locator(sel).count() > 0:
|
|
84
89
|
await self.page.click(
|
|
85
|
-
sel, timeout=self.
|
|
90
|
+
sel, timeout=self.DEFAULT_TIMEOUT, force=True
|
|
86
91
|
)
|
|
87
|
-
return f"Clicked element via {sel}"
|
|
92
|
+
return f"Clicked element via force: {sel}"
|
|
88
93
|
except Exception:
|
|
89
|
-
|
|
90
|
-
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Strategy 2: Try JavaScript click as fallback
|
|
97
|
+
for sel in strategies:
|
|
98
|
+
try:
|
|
99
|
+
await self.page.locator(sel).first.evaluate("el => el.click()")
|
|
100
|
+
await asyncio.sleep(0.1) # Brief wait for effects
|
|
101
|
+
return f"Clicked element via JS: {sel}"
|
|
102
|
+
except Exception:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
return "Error: All click strategies failed"
|
|
91
106
|
|
|
92
107
|
async def _type(self, action: Dict[str, Any]) -> str:
|
|
108
|
+
r"""Handle typing text into input fields."""
|
|
93
109
|
ref = action.get("ref")
|
|
94
110
|
selector = action.get("selector")
|
|
95
111
|
text = action.get("text", "")
|
|
@@ -103,6 +119,7 @@ class ActionExecutor:
|
|
|
103
119
|
return f"Type failed: {exc}"
|
|
104
120
|
|
|
105
121
|
async def _select(self, action: Dict[str, Any]) -> str:
|
|
122
|
+
r"""Handle selecting options from dropdowns."""
|
|
106
123
|
ref = action.get("ref")
|
|
107
124
|
selector = action.get("selector")
|
|
108
125
|
value = action.get("value", "")
|
|
@@ -118,8 +135,9 @@ class ActionExecutor:
|
|
|
118
135
|
return f"Select failed: {exc}"
|
|
119
136
|
|
|
120
137
|
async def _wait(self, action: Dict[str, Any]) -> str:
|
|
138
|
+
r"""Handle wait actions."""
|
|
121
139
|
if "timeout" in action:
|
|
122
|
-
ms = action["timeout"]
|
|
140
|
+
ms = int(action["timeout"])
|
|
123
141
|
await asyncio.sleep(ms / 1000)
|
|
124
142
|
return f"Waited {ms}ms"
|
|
125
143
|
if "selector" in action:
|
|
@@ -131,6 +149,7 @@ class ActionExecutor:
|
|
|
131
149
|
return "Error: wait requires timeout/selector"
|
|
132
150
|
|
|
133
151
|
async def _extract(self, action: Dict[str, Any]) -> str:
|
|
152
|
+
r"""Handle text extraction from elements."""
|
|
134
153
|
ref = action.get("ref")
|
|
135
154
|
if not ref:
|
|
136
155
|
return "Error: extract requires ref"
|
|
@@ -140,6 +159,7 @@ class ActionExecutor:
|
|
|
140
159
|
return f"Extracted: {txt[:100] if txt else 'None'}"
|
|
141
160
|
|
|
142
161
|
async def _scroll(self, action: Dict[str, Any]) -> str:
|
|
162
|
+
r"""Handle page scrolling with safe parameter validation."""
|
|
143
163
|
direction = action.get("direction", "down")
|
|
144
164
|
amount = action.get("amount", 300)
|
|
145
165
|
|
|
@@ -151,18 +171,22 @@ class ActionExecutor:
|
|
|
151
171
|
# Safely convert amount to integer and clamp to reasonable range
|
|
152
172
|
amount_int = int(amount)
|
|
153
173
|
amount_int = max(
|
|
154
|
-
-
|
|
155
|
-
|
|
174
|
+
-self.MAX_SCROLL_AMOUNT,
|
|
175
|
+
min(self.MAX_SCROLL_AMOUNT, amount_int),
|
|
176
|
+
) # Clamp to MAX_SCROLL_AMOUNT range
|
|
156
177
|
except (ValueError, TypeError):
|
|
157
178
|
return "Error: amount must be a valid number"
|
|
158
179
|
|
|
159
180
|
# Use safe evaluation with bound parameters
|
|
160
181
|
scroll_offset = amount_int if direction == "down" else -amount_int
|
|
161
|
-
await self.page.evaluate(
|
|
182
|
+
await self.page.evaluate(
|
|
183
|
+
"offset => window.scrollBy(0, offset)", scroll_offset
|
|
184
|
+
)
|
|
162
185
|
await asyncio.sleep(0.5)
|
|
163
186
|
return f"Scrolled {direction} by {abs(amount_int)}px"
|
|
164
187
|
|
|
165
188
|
async def _enter(self, action: Dict[str, Any]) -> str:
|
|
189
|
+
r"""Handle Enter key press actions."""
|
|
166
190
|
ref = action.get("ref")
|
|
167
191
|
selector = action.get("selector")
|
|
168
192
|
if ref:
|
|
@@ -175,16 +199,28 @@ class ActionExecutor:
|
|
|
175
199
|
|
|
176
200
|
# utilities
|
|
177
201
|
async def _wait_dom_stable(self) -> None:
|
|
202
|
+
r"""Wait for DOM to become stable before executing actions."""
|
|
178
203
|
try:
|
|
204
|
+
# Wait for basic DOM content loading
|
|
179
205
|
await self.page.wait_for_load_state(
|
|
180
206
|
'domcontentloaded', timeout=self.SHORT_TIMEOUT
|
|
181
207
|
)
|
|
208
|
+
|
|
209
|
+
# Try to wait for network idle briefly
|
|
210
|
+
try:
|
|
211
|
+
await self.page.wait_for_load_state(
|
|
212
|
+
'networkidle', timeout=self.SHORT_TIMEOUT
|
|
213
|
+
)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass # Network idle is optional
|
|
216
|
+
|
|
182
217
|
except Exception:
|
|
183
|
-
pass
|
|
218
|
+
pass # Don't fail if wait times out
|
|
184
219
|
|
|
185
220
|
# static helpers
|
|
186
221
|
@staticmethod
|
|
187
222
|
def should_update_snapshot(action: Dict[str, Any]) -> bool:
|
|
223
|
+
r"""Determine if an action requires a snapshot update."""
|
|
188
224
|
change_types = {
|
|
189
225
|
"click",
|
|
190
226
|
"type",
|
|
@@ -12,24 +12,24 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import json
|
|
15
|
-
import logging
|
|
16
15
|
import re
|
|
17
16
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
18
17
|
|
|
18
|
+
from camel.logger import get_logger
|
|
19
19
|
from camel.models import BaseModelBackend, ModelFactory
|
|
20
20
|
from camel.types import ModelPlatformType, ModelType
|
|
21
21
|
|
|
22
22
|
from .actions import ActionExecutor
|
|
23
|
-
from .
|
|
23
|
+
from .browser_session import NVBrowserSession
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
26
26
|
from camel.agents import ChatAgent
|
|
27
27
|
|
|
28
|
-
logger =
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class PlaywrightLLMAgent:
|
|
32
|
-
"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
|
|
32
|
+
r"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
|
|
33
33
|
|
|
34
34
|
# System prompt as class constant to avoid recreation
|
|
35
35
|
SYSTEM_PROMPT = """
|
|
@@ -90,8 +90,8 @@ what was accomplished
|
|
|
90
90
|
self.action_history: List[Dict[str, Any]] = []
|
|
91
91
|
if model_backend is None:
|
|
92
92
|
model_backend = ModelFactory.create(
|
|
93
|
-
model_platform=ModelPlatformType.
|
|
94
|
-
model_type=ModelType.
|
|
93
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
94
|
+
model_type=ModelType.DEFAULT,
|
|
95
95
|
model_config_dict={"temperature": 0, "top_p": 1},
|
|
96
96
|
)
|
|
97
97
|
self.model_backend = model_backend
|
|
@@ -99,16 +99,19 @@ what was accomplished
|
|
|
99
99
|
self._chat_agent: Optional[ChatAgent] = None
|
|
100
100
|
|
|
101
101
|
async def navigate(self, url: str) -> str:
|
|
102
|
+
r"""Navigate to a URL and return the snapshot."""
|
|
102
103
|
try:
|
|
103
104
|
# NVBrowserSession handles waits internally
|
|
104
105
|
logger.debug("Navigated to URL: %s", url)
|
|
105
106
|
await self._session.visit(url)
|
|
106
107
|
return await self._session.get_snapshot(force_refresh=True)
|
|
107
108
|
except Exception as exc:
|
|
108
|
-
|
|
109
|
+
error_msg = f"Error: could not navigate to {url} - {exc}"
|
|
110
|
+
logger.error(error_msg)
|
|
111
|
+
return error_msg
|
|
109
112
|
|
|
110
113
|
def _get_chat_agent(self) -> "ChatAgent":
|
|
111
|
-
"""Get or create the ChatAgent instance."""
|
|
114
|
+
r"""Get or create the ChatAgent instance."""
|
|
112
115
|
from camel.agents import ChatAgent
|
|
113
116
|
|
|
114
117
|
if self._chat_agent is None:
|
|
@@ -165,12 +168,16 @@ what was accomplished
|
|
|
165
168
|
logger.warning(
|
|
166
169
|
"Could not parse JSON from LLM response: %s", content[:200]
|
|
167
170
|
)
|
|
171
|
+
return self._get_fallback_response("Parsing error")
|
|
172
|
+
|
|
173
|
+
def _get_fallback_response(self, error_msg: str) -> Dict[str, Any]:
|
|
174
|
+
r"""Generate a fallback response structure."""
|
|
168
175
|
return {
|
|
169
|
-
"plan": ["Could not parse response"],
|
|
176
|
+
"plan": [f"Could not parse response: {error_msg}"],
|
|
170
177
|
"action": {
|
|
171
178
|
"type": "finish",
|
|
172
179
|
"ref": None,
|
|
173
|
-
"summary": "Parsing error",
|
|
180
|
+
"summary": f"Parsing error: {error_msg}",
|
|
174
181
|
},
|
|
175
182
|
}
|
|
176
183
|
|
|
@@ -181,7 +188,7 @@ what was accomplished
|
|
|
181
188
|
is_initial: bool,
|
|
182
189
|
history: Optional[List[Dict[str, Any]]] = None,
|
|
183
190
|
) -> Dict[str, Any]:
|
|
184
|
-
"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
|
|
191
|
+
r"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
|
|
185
192
|
# Build user message
|
|
186
193
|
if is_initial:
|
|
187
194
|
user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
|
|
@@ -208,6 +215,7 @@ what was accomplished
|
|
|
208
215
|
return self._safe_parse_json(content)
|
|
209
216
|
|
|
210
217
|
async def process_command(self, prompt: str, max_steps: int = 15):
|
|
218
|
+
r"""Process a command using LLM-guided browser automation."""
|
|
211
219
|
# initial full snapshot
|
|
212
220
|
full_snapshot = await self._session.get_snapshot()
|
|
213
221
|
assert self._session.snapshot is not None
|
|
@@ -270,9 +278,11 @@ what was accomplished
|
|
|
270
278
|
logger.info("Process completed with %d steps", steps)
|
|
271
279
|
|
|
272
280
|
async def _run_action(self, action: Dict[str, Any]) -> str:
|
|
281
|
+
r"""Execute a single action and return the result."""
|
|
273
282
|
if action.get("type") == "navigate":
|
|
274
283
|
return await self.navigate(action.get("url", ""))
|
|
275
284
|
return await self._session.exec_action(action)
|
|
276
285
|
|
|
277
286
|
async def close(self):
|
|
287
|
+
r"""Clean up browser session and resources."""
|
|
278
288
|
await self._session.close()
|