camel-ai 0.2.71a2__py3-none-any.whl → 0.2.71a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/chat_agent.py +297 -16
  4. camel/interpreters/docker_interpreter.py +3 -2
  5. camel/loaders/base_loader.py +85 -0
  6. camel/messages/base.py +2 -6
  7. camel/services/agent_openapi_server.py +380 -0
  8. camel/societies/workforce/workforce.py +144 -33
  9. camel/toolkits/__init__.py +7 -4
  10. camel/toolkits/craw4ai_toolkit.py +2 -2
  11. camel/toolkits/file_write_toolkit.py +6 -6
  12. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
  13. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
  14. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
  15. camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1008 -0
  17. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
  18. camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +202 -23
  19. camel/toolkits/note_taking_toolkit.py +90 -0
  20. camel/toolkits/openai_image_toolkit.py +292 -0
  21. camel/toolkits/slack_toolkit.py +4 -4
  22. camel/toolkits/terminal_toolkit.py +223 -73
  23. camel/types/agents/tool_calling_record.py +4 -1
  24. camel/types/enums.py +24 -24
  25. camel/utils/mcp_client.py +37 -1
  26. camel/utils/tool_result.py +44 -0
  27. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/METADATA +58 -5
  28. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/RECORD +30 -26
  29. camel/toolkits/dalle_toolkit.py +0 -175
  30. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
  31. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/WHEEL +0 -0
  32. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/licenses/LICENSE +0 -0
@@ -1,446 +0,0 @@
1
- # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
- # Licensed under the Apache License, Version 2.0 (the "License");
3
- # you may not use this file except in compliance with the License.
4
- # You may obtain a copy of the License at
5
- #
6
- # http://www.apache.org/licenses/LICENSE-2.0
7
- #
8
- # Unless required by applicable law or agreed to in writing, software
9
- # distributed under the License is distributed on an "AS IS" BASIS,
10
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
- # See the License for the specific language governing permissions and
12
- # limitations under the License.
13
- # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
- from __future__ import annotations
15
-
16
- from typing import Any, Dict, List, Optional
17
-
18
- from camel.logger import get_logger
19
- from camel.models import BaseModelBackend
20
- from camel.toolkits.base import BaseToolkit
21
- from camel.toolkits.function_tool import FunctionTool
22
-
23
- from .agent import PlaywrightLLMAgent
24
- from .nv_browser_session import NVBrowserSession
25
-
26
- logger = get_logger(__name__)
27
-
28
-
29
- class BrowserNonVisualToolkit(BaseToolkit):
30
- r"""A lightweight, *non-visual* browser toolkit exposing primitive
31
- Playwright actions as CAMEL `FunctionTool`s.
32
- """
33
-
34
- def __init__(
35
- self,
36
- *,
37
- headless: bool = True,
38
- user_data_dir: Optional[str] = None,
39
- web_agent_model: Optional[BaseModelBackend] = None,
40
- ) -> None:
41
- super().__init__()
42
- self._headless = headless
43
- self._user_data_dir = user_data_dir
44
- self.web_agent_model = web_agent_model # Currently unused but kept
45
- # for compatibility
46
-
47
- # Encapsulated browser session
48
- self._session = NVBrowserSession(
49
- headless=headless, user_data_dir=user_data_dir
50
- )
51
-
52
- # Optional higher-level agent (only if user supplies model)
53
- self._agent: Optional[PlaywrightLLMAgent] = None
54
-
55
- def __del__(self):
56
- r"""Best-effort cleanup when toolkit is garbage collected.
57
-
58
- 1. We *avoid* running during the Python interpreter shutdown phase
59
- (`sys.is_finalizing()`), because the import machinery and/or event
60
- loop may already be torn down which leads to noisy exceptions such
61
- as `ImportError: sys.meta_path is None` or
62
- `RuntimeError: Event loop is closed`.
63
- 2. We protect all imports and event-loop operations with defensive
64
- `try/except` blocks. This ensures that, even if cleanup cannot be
65
- carried out, we silently ignore the failure instead of polluting
66
- stderr on program exit.
67
- """
68
- try:
69
- import sys
70
-
71
- if getattr(sys, "is_finalizing", lambda: False)():
72
- return # Skip cleanup during interpreter shutdown
73
-
74
- import asyncio
75
-
76
- try:
77
- loop = asyncio.get_event_loop()
78
- except RuntimeError:
79
- # No event loop in current thread → nothing to clean
80
- return
81
-
82
- if loop.is_closed():
83
- # The default loop is closed, create a *temporary* loop just
84
- # for cleanup so that Playwright / asyncio transports are
85
- # gracefully shut down. This avoids noisy warnings such as
86
- # "RuntimeError: Event loop is closed" when the program
87
- # exits.
88
- try:
89
- tmp_loop = asyncio.new_event_loop()
90
- try:
91
- asyncio.set_event_loop(tmp_loop)
92
- tmp_loop.run_until_complete(self.close_browser())
93
- finally:
94
- # Best-effort shutdown of async generators and loop
95
- # itself (Python ≥3.6).
96
- if hasattr(tmp_loop, "shutdown_asyncgens"):
97
- tmp_loop.run_until_complete(
98
- tmp_loop.shutdown_asyncgens()
99
- )
100
- tmp_loop.close()
101
- finally:
102
- # Ensure no subsequent get_event_loop() picks up a now
103
- # closed temporary loop.
104
- asyncio.set_event_loop(None)
105
- return
106
-
107
- if loop.is_running():
108
- try:
109
- task = loop.create_task(self.close_browser())
110
- del task # Fire-and-forget
111
- except RuntimeError:
112
- # Loop is running but not in this thread → ignore
113
- pass
114
- else:
115
- # Own the loop → safe to run
116
- asyncio.run(self.close_browser())
117
- except Exception:
118
- # Suppress *all* errors during garbage collection
119
- pass
120
-
121
- async def _ensure_browser(self):
122
- await self._session.ensure_browser()
123
-
124
- async def _require_page(self):
125
- await self._session.ensure_browser()
126
- return await self._session.get_page()
127
-
128
- def _validate_ref(self, ref: str, method_name: str) -> None:
129
- r"""Validate that ref parameter is a non-empty string."""
130
- if not ref or not isinstance(ref, str):
131
- logger.error(
132
- f"{method_name}(): 'ref' must be a non-empty string, "
133
- f"got: {ref}"
134
- )
135
-
136
- async def open_browser(
137
- self, start_url: Optional[str] = None
138
- ) -> Dict[str, str]:
139
- r"""Launches a new browser session. This should be the first step.
140
-
141
- Args:
142
- start_url (Optional[str]): The URL to navigate to after the browser
143
- launches. If not provided, the browser will open with a blank
144
- page. (default: :obj:`None`)
145
-
146
- Returns:
147
- Dict[str, str]: A dictionary containing the result of the action
148
- and a snapshot of the page. The keys are "result" and
149
- "snapshot". The "snapshot" is a YAML-like representation of
150
- the page's DOM structure, including element references
151
- (e.g., "e3") that can be used in other tool calls.
152
- """
153
- await self._session.ensure_browser()
154
- if start_url:
155
- return await self.visit_page(start_url)
156
- # If no start_url provided, still capture initial snapshot
157
- snapshot = await self._session.get_snapshot(
158
- force_refresh=True, diff_only=False
159
- )
160
- return {"result": "Browser session started.", "snapshot": snapshot}
161
-
162
- async def close_browser(self) -> str:
163
- r"""Closes the current browser session, freeing up all associated
164
- resources. This should be called when the browsing task is complete.
165
-
166
- Returns:
167
- str: A confirmation message indicating the session has been
168
- closed.
169
- """
170
- # Close agent if it exists
171
- if self._agent is not None:
172
- try:
173
- await self._agent.close()
174
- except Exception:
175
- pass # Don't fail if agent cleanup fails
176
- self._agent = None
177
-
178
- # Close session
179
- await NVBrowserSession.close_all_sessions()
180
- return "Browser session closed."
181
-
182
- async def visit_page(self, url: str) -> Dict[str, str]:
183
- r"""Navigates the current browser page to a new URL.
184
-
185
- Args:
186
- url (str): The URL to navigate to. Must be a fully qualified URL
187
- (e.g., "https://www.google.com").
188
-
189
- Returns:
190
- Dict[str, str]: A dictionary containing the navigation result and a
191
- snapshot of the new page. The keys are "result" and
192
- "snapshot". The "snapshot" provides a fresh view of the
193
- page's DOM structure.
194
- """
195
- if not url or not isinstance(url, str):
196
- raise ValueError("visit_page(): 'url' must be a non-empty string")
197
-
198
- nav_result = await self._session.visit(url)
199
- snapshot = await self._session.get_snapshot(
200
- force_refresh=True, diff_only=False
201
- )
202
- return {"result": nav_result, "snapshot": snapshot}
203
-
204
- async def get_page_snapshot(
205
- self, *, force_refresh: bool = False, diff_only: bool = False
206
- ) -> str:
207
- r"""Capture a YAML-like structural snapshot of the DOM.
208
-
209
- Args:
210
- force_refresh (bool): When ``True`` always re-generate the
211
- snapshot even if the URL has not changed. (default:
212
- :obj:`False`)
213
- diff_only (bool): When ``True`` return only the diff relative to
214
- the previous snapshot. (default: :obj:`False`)
215
-
216
- Returns:
217
- str: Formatted snapshot string.
218
- """
219
- return await self._session.get_snapshot(
220
- force_refresh=force_refresh, diff_only=diff_only
221
- )
222
-
223
- async def click(self, *, ref: str) -> Dict[str, str]:
224
- r"""Performs a click action on a specified element on the current page.
225
-
226
- Args:
227
- ref (str): The reference ID of the element to click. This ID is
228
- obtained from the page snapshot (e.g., "e12").
229
-
230
- Returns:
231
- Dict[str, str]: A dictionary containing the result of the action.
232
- If the click causes a change in the page's structure, a
233
- "snapshot" key will be included with a diff of the changes.
234
- """
235
- self._validate_ref(ref, "click")
236
-
237
- action: Dict[str, Any] = {"type": "click", "ref": ref}
238
- return await self._exec_with_snapshot(action)
239
-
240
- async def type(self, *, ref: str, text: str) -> Dict[str, str]:
241
- r"""Types text into an input field or textarea on the current page.
242
-
243
- Args:
244
- ref (str): The reference ID of the input element. This ID is
245
- obtained from the page snapshot (e.g., "e25").
246
- text (str): The text to be typed into the element.
247
-
248
- Returns:
249
- Dict[str, str]: A dictionary containing the result of the action.
250
- The key is "result".
251
- """
252
- self._validate_ref(ref, "type")
253
-
254
- action: Dict[str, Any] = {"type": "type", "ref": ref, "text": text}
255
- return await self._exec_with_snapshot(action)
256
-
257
- async def select(self, *, ref: str, value: str) -> Dict[str, str]:
258
- r"""Selects an option from a dropdown (<select>) element on the page.
259
-
260
- Args:
261
- ref (str): The reference ID of the <select> element. This ID is
262
- obtained from the page snapshot.
263
- value (str): The value of the option to be selected. This should
264
- match the 'value' attribute of an <option> tag.
265
-
266
- Returns:
267
- Dict[str, str]: A dictionary containing the result of the action.
268
- The key is "result".
269
- """
270
- self._validate_ref(ref, "select")
271
-
272
- action: Dict[str, Any] = {"type": "select", "ref": ref, "value": value}
273
- return await self._exec_with_snapshot(action)
274
-
275
- async def scroll(self, *, direction: str, amount: int) -> Dict[str, str]:
276
- r"""Scrolls the current page up or down by a specified amount.
277
-
278
- Args:
279
- direction (str): The direction to scroll. Must be either "up" or
280
- "down".
281
- amount (int): The number of pixels to scroll.
282
-
283
- Returns:
284
- Dict[str, str]: A dictionary containing the result of the action.
285
- The key is "result".
286
- """
287
- if direction not in ("up", "down"):
288
- logger.error("scroll(): 'direction' must be 'up' or 'down'")
289
- return {
290
- "result": "scroll() Error: 'direction' must be 'up' or 'down'"
291
- }
292
-
293
- action = {"type": "scroll", "direction": direction, "amount": amount}
294
- return await self._exec_with_snapshot(action)
295
-
296
- async def enter(self, *, ref: str) -> Dict[str, str]:
297
- r"""Simulates pressing the Enter key on a specific element.
298
- This is often used to submit forms.
299
-
300
- Args:
301
- ref (str): The reference ID of the element to focus before
302
- pressing Enter. This ID is obtained from the page snapshot.
303
-
304
- Returns:
305
- Dict[str, str]: A dictionary containing the result of the action.
306
- If pressing Enter causes a page navigation or DOM change, a
307
- "snapshot" key will be included with a diff of the changes.
308
- """
309
- self._validate_ref(ref, "enter")
310
-
311
- action: Dict[str, Any] = {"type": "enter", "ref": ref}
312
- return await self._exec_with_snapshot(action)
313
-
314
- async def wait_user(
315
- self,
316
- timeout_sec: Optional[float] = None,
317
- ) -> Dict[str, str]:
318
- r"""Pauses the agent's execution and waits for human intervention.
319
- This is useful for tasks that require manual steps, like solving a
320
- CAPTCHA. The agent will print a message and wait for the user to
321
- press the Enter key in the console.
322
-
323
- Args:
324
- timeout_sec (Optional[float]): The maximum time in seconds to wait
325
- for the user. If `None`, it will wait indefinitely. Defaults
326
- to `None`. (default: :obj:`None`)
327
-
328
- Returns:
329
- Dict[str, str]: A dictionary containing a result message and a
330
- full snapshot of the current page after the user has acted.
331
- The keys are "result" and "snapshot".
332
- """
333
-
334
- import asyncio
335
-
336
- prompt = (
337
- "🕑 Agent is waiting for human input. "
338
- "Complete the required action in the browser, then press Enter "
339
- "to continue..."
340
- )
341
-
342
- logger.info(f"\n{prompt}\n")
343
-
344
- async def _await_enter():
345
- await asyncio.to_thread(input, ">>> Press Enter to resume <<<\n")
346
-
347
- try:
348
- if timeout_sec is not None:
349
- await asyncio.wait_for(_await_enter(), timeout=timeout_sec)
350
- result_msg = "User resumed."
351
- else:
352
- await _await_enter()
353
- result_msg = "User resumed."
354
- except asyncio.TimeoutError:
355
- result_msg = f"Timeout {timeout_sec}s reached, auto-resumed."
356
-
357
- snapshot = await self._session.get_snapshot(
358
- force_refresh=True,
359
- diff_only=False,
360
- )
361
-
362
- return {"result": result_msg, "snapshot": snapshot}
363
-
364
- # Helper to run through ActionExecutor
365
- async def _exec(self, action: Dict[str, Any]) -> str:
366
- return await self._session.exec_action(action)
367
-
368
- async def _exec_with_snapshot(
369
- self, action: Dict[str, Any]
370
- ) -> Dict[str, str]:
371
- r"""Execute action and, if DOM structure changed, include snapshot
372
- diff.
373
- """
374
- result = await self._session.exec_action(action)
375
-
376
- # Only capture diff if action type typically changes DOM
377
- from .actions import ActionExecutor
378
-
379
- if not ActionExecutor.should_update_snapshot(action):
380
- return {"result": result}
381
-
382
- # Capture structural diff to previous snapshot
383
- diff = await self._session.get_snapshot(
384
- force_refresh=True, diff_only=True
385
- )
386
-
387
- if diff.startswith("- Page Snapshot (no structural changes)"):
388
- return {"result": result}
389
-
390
- return {"result": result, "snapshot": diff}
391
-
392
- def _ensure_agent(self) -> PlaywrightLLMAgent:
393
- r"""Create PlaywrightLLMAgent on first use if `web_agent_model`
394
- provided."""
395
- if self.web_agent_model is None:
396
- raise RuntimeError(
397
- "web_agent_model not supplied - high-level task planning is "
398
- "unavailable."
399
- )
400
-
401
- if self._agent is None:
402
- self._agent = PlaywrightLLMAgent(
403
- headless=self._headless,
404
- user_data_dir=self._user_data_dir,
405
- model_backend=self.web_agent_model,
406
- )
407
- return self._agent
408
-
409
- async def solve_task(
410
- self, task_prompt: str, start_url: str, max_steps: int = 15
411
- ) -> str:
412
- r"""Use LLM agent to autonomously complete the task (requires
413
- `web_agent_model`).
414
-
415
- Args:
416
- task_prompt (str): The task prompt to complete.
417
- start_url (str): The URL to navigate to.
418
- max_steps (int): The maximum number of steps to take.
419
- (default: :obj:`15`)
420
-
421
- Returns:
422
- str: The result of the task.
423
- """
424
-
425
- agent = self._ensure_agent()
426
- await agent.navigate(start_url)
427
- await agent.process_command(task_prompt, max_steps=max_steps)
428
- return "Task processing finished - see stdout for detailed trace."
429
-
430
- def get_tools(self) -> List[FunctionTool]:
431
- base_tools = [
432
- FunctionTool(self.open_browser),
433
- FunctionTool(self.close_browser),
434
- FunctionTool(self.visit_page),
435
- FunctionTool(self.click),
436
- FunctionTool(self.type),
437
- FunctionTool(self.select),
438
- FunctionTool(self.scroll),
439
- FunctionTool(self.enter),
440
- FunctionTool(self.wait_user),
441
- ]
442
-
443
- if self.web_agent_model is not None:
444
- base_tools.append(FunctionTool(self.solve_task))
445
-
446
- return base_tools