camel-ai 0.2.68__py3-none-any.whl → 0.2.69a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (38) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +170 -11
  3. camel/configs/vllm_config.py +2 -0
  4. camel/datagen/self_improving_cot.py +1 -1
  5. camel/memories/context_creators/score_based.py +129 -87
  6. camel/runtimes/configs.py +11 -11
  7. camel/runtimes/daytona_runtime.py +4 -4
  8. camel/runtimes/docker_runtime.py +6 -6
  9. camel/runtimes/remote_http_runtime.py +5 -5
  10. camel/societies/workforce/prompts.py +13 -12
  11. camel/societies/workforce/single_agent_worker.py +263 -26
  12. camel/societies/workforce/utils.py +10 -2
  13. camel/societies/workforce/worker.py +21 -45
  14. camel/societies/workforce/workforce.py +43 -17
  15. camel/tasks/task.py +18 -12
  16. camel/toolkits/__init__.py +2 -0
  17. camel/toolkits/aci_toolkit.py +19 -19
  18. camel/toolkits/arxiv_toolkit.py +6 -6
  19. camel/toolkits/dappier_toolkit.py +5 -5
  20. camel/toolkits/file_write_toolkit.py +10 -10
  21. camel/toolkits/function_tool.py +4 -3
  22. camel/toolkits/github_toolkit.py +3 -3
  23. camel/toolkits/non_visual_browser_toolkit/__init__.py +18 -0
  24. camel/toolkits/non_visual_browser_toolkit/actions.py +196 -0
  25. camel/toolkits/non_visual_browser_toolkit/agent.py +278 -0
  26. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +363 -0
  27. camel/toolkits/non_visual_browser_toolkit/nv_browser_session.py +175 -0
  28. camel/toolkits/non_visual_browser_toolkit/snapshot.js +188 -0
  29. camel/toolkits/non_visual_browser_toolkit/snapshot.py +164 -0
  30. camel/toolkits/pptx_toolkit.py +4 -4
  31. camel/toolkits/sympy_toolkit.py +1 -1
  32. camel/toolkits/task_planning_toolkit.py +3 -3
  33. camel/toolkits/thinking_toolkit.py +1 -1
  34. camel/toolkits/video_analysis_toolkit.py +77 -3
  35. {camel_ai-0.2.68.dist-info → camel_ai-0.2.69a2.dist-info}/METADATA +5 -1
  36. {camel_ai-0.2.68.dist-info → camel_ai-0.2.69a2.dist-info}/RECORD +38 -31
  37. {camel_ai-0.2.68.dist-info → camel_ai-0.2.69a2.dist-info}/WHEEL +0 -0
  38. {camel_ai-0.2.68.dist-info → camel_ai-0.2.69a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,363 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from __future__ import annotations
15
+
16
+ from typing import Any, Dict, List, Optional
17
+
18
+ from camel.models import BaseModelBackend
19
+ from camel.toolkits.base import BaseToolkit
20
+ from camel.toolkits.function_tool import FunctionTool
21
+
22
+ from .agent import PlaywrightLLMAgent
23
+
24
+ # session wrapper
25
+ from .nv_browser_session import NVBrowserSession
26
+
27
+
28
+ class BrowserNonVisualToolkit(BaseToolkit):
29
+ r"""A lightweight, *non-visual* browser toolkit exposing primitive
30
+ Playwright actions as CAMEL `FunctionTool`s.
31
+ """
32
+
33
+ def __init__(
34
+ self,
35
+ *,
36
+ headless: bool = True,
37
+ user_data_dir: Optional[str] = None,
38
+ web_agent_model: Optional[BaseModelBackend] = None,
39
+ ) -> None:
40
+ super().__init__()
41
+ self._headless = headless
42
+ self._user_data_dir = user_data_dir
43
+ self.web_agent_model = web_agent_model # Currently unused but kept
44
+ # for compatibility
45
+
46
+ # Encapsulated browser session
47
+ self._session = NVBrowserSession(
48
+ headless=headless, user_data_dir=user_data_dir
49
+ )
50
+
51
+ # Optional higher-level agent (only if user supplies model)
52
+ self._agent: Optional[PlaywrightLLMAgent] = None
53
+
54
+ def __del__(self):
55
+ r"""Ensure cleanup when toolkit is garbage collected."""
56
+ # Note: __del__ cannot be async, so we schedule cleanup if needed
57
+ import asyncio
58
+
59
+ try:
60
+ loop = asyncio.get_event_loop()
61
+ if loop.is_running():
62
+ task = loop.create_task(self.close_browser())
63
+ # Don't wait for completion to avoid blocking
64
+ del task
65
+ else:
66
+ asyncio.run(self.close_browser())
67
+ except Exception:
68
+ pass # Don't fail during garbage collection
69
+
70
+ # ------------------------------------------------------------------
71
+ # Internal helpers
72
+ # ------------------------------------------------------------------
73
+ async def _ensure_browser(self):
74
+ await self._session.ensure_browser()
75
+
76
+ async def _require_page(self):
77
+ await self._session.ensure_browser()
78
+ return await self._session.get_page()
79
+
80
+ def _validate_ref(self, ref: str, method_name: str) -> None:
81
+ """Validate that ref parameter is a non-empty string."""
82
+ if not ref or not isinstance(ref, str):
83
+ raise ValueError(
84
+ f"{method_name}(): 'ref' must be a non-empty string, "
85
+ f"got: {ref}"
86
+ )
87
+
88
+ # ------------------------------------------------------------------
89
+ # Tool implementations
90
+ # ------------------------------------------------------------------
91
+ async def open_browser(
92
+ self, start_url: Optional[str] = None
93
+ ) -> Dict[str, str]:
94
+ r"""Launch a Playwright browser session.
95
+
96
+ Args:
97
+ start_url (Optional[str]): If provided, the page will navigate to
98
+ this URL immediately after the browser launches.
99
+
100
+ Returns:
101
+ Dict[str, str]: Keys: ``result`` for action outcome,
102
+ ``snapshot`` for full DOM snapshot.
103
+ """
104
+ await self._session.ensure_browser()
105
+ if start_url:
106
+ return await self.visit_page(start_url)
107
+ # If no start_url provided, still capture initial snapshot
108
+ snapshot = await self._session.get_snapshot(
109
+ force_refresh=True, diff_only=False
110
+ )
111
+ return {"result": "Browser session started.", "snapshot": snapshot}
112
+
113
+ async def close_browser(self) -> str:
114
+ r"""Terminate the current browser session and free all resources.
115
+
116
+ Returns:
117
+ str: Confirmation message.
118
+ """
119
+ # Close agent if it exists
120
+ if self._agent is not None:
121
+ try:
122
+ await self._agent.close()
123
+ except Exception:
124
+ pass # Don't fail if agent cleanup fails
125
+ self._agent = None
126
+
127
+ # Close session
128
+ await self._session.close()
129
+ return "Browser session closed."
130
+
131
+ # Navigation / page state ------------------------------------------------
132
+ async def visit_page(self, url: str) -> Dict[str, str]:
133
+ """Navigate the current page to the specified URL.
134
+
135
+ Args:
136
+ url (str): The destination URL.
137
+
138
+ Returns:
139
+ Dict[str, str]: Keys: ``result`` for action outcome,
140
+ ``snapshot`` for full DOM snapshot.
141
+ """
142
+ if not url or not isinstance(url, str):
143
+ raise ValueError("visit_page(): 'url' must be a non-empty string")
144
+
145
+ nav_result = await self._session.visit(url)
146
+ snapshot = await self._session.get_snapshot(
147
+ force_refresh=True, diff_only=False
148
+ )
149
+ return {"result": nav_result, "snapshot": snapshot}
150
+
151
+ async def get_page_snapshot(
152
+ self, *, force_refresh: bool = False, diff_only: bool = False
153
+ ) -> str:
154
+ r"""Capture a YAML-like structural snapshot of the DOM.
155
+
156
+ Args:
157
+ force_refresh (bool): When ``True`` always re-generate the
158
+ snapshot even
159
+ if the URL has not changed.
160
+ diff_only (bool): If ``True`` return only the diff relative to the
161
+ previous snapshot.
162
+
163
+ Returns:
164
+ str: Formatted snapshot string.
165
+ """
166
+ return await self._session.get_snapshot(
167
+ force_refresh=force_refresh, diff_only=diff_only
168
+ )
169
+
170
+ # Element-level wrappers -------------------------------------------------
171
+ async def click(self, *, ref: str) -> Dict[str, str]:
172
+ r"""Click an element identified by ``ref``
173
+
174
+ Args:
175
+ ref (str): Element reference ID extracted from snapshot (e.g.
176
+ ``"e3"``).
177
+
178
+ Returns:
179
+ Dict[str, str]: Result message from ``ActionExecutor``.
180
+ """
181
+ self._validate_ref(ref, "click")
182
+
183
+ action: Dict[str, Any] = {"type": "click", "ref": ref}
184
+ return await self._exec_with_snapshot(action)
185
+
186
+ async def type(self, *, ref: str, text: str) -> Dict[str, str]:
187
+ r"""Type text into an input or textarea element.
188
+
189
+ Args:
190
+ ref (str): Element reference ID extracted from snapshot (e.g.
191
+ ``"e3"``).
192
+ text (str): The text to enter.
193
+
194
+ Returns:
195
+ Dict[str, str]: Execution result message.
196
+ """
197
+ self._validate_ref(ref, "type")
198
+
199
+ action: Dict[str, Any] = {"type": "type", "ref": ref, "text": text}
200
+ return await self._exec_with_snapshot(action)
201
+
202
+ async def select(self, *, ref: str, value: str) -> Dict[str, str]:
203
+ r"""Select an option in a ``<select>`` element.
204
+
205
+ Args:
206
+ ref (str): Element reference ID.
207
+ value (str): The value / option to select.
208
+
209
+ Returns:
210
+ Dict[str, str]: Execution result message.
211
+ """
212
+ self._validate_ref(ref, "select")
213
+
214
+ action: Dict[str, Any] = {"type": "select", "ref": ref, "value": value}
215
+ return await self._exec_with_snapshot(action)
216
+
217
+ async def scroll(self, *, direction: str, amount: int) -> Dict[str, str]:
218
+ r"""Scroll the page.
219
+
220
+ Args:
221
+ direction (str): ``"down"`` or ``"up"``.
222
+ amount (int): Pixel distance to scroll.
223
+
224
+ Returns:
225
+ Dict[str, str]: Execution result message.
226
+ """
227
+ if direction not in ("up", "down"):
228
+ raise ValueError("scroll(): 'direction' must be 'up' or 'down'")
229
+
230
+ action = {"type": "scroll", "direction": direction, "amount": amount}
231
+ return await self._exec_with_snapshot(action)
232
+
233
+ async def wait(
234
+ self, *, timeout_ms: int | None = None, selector: str | None = None
235
+ ) -> Dict[str, str]:
236
+ r"""Explicit wait utility.
237
+
238
+ Args:
239
+ timeout_ms (Optional[int]): Milliseconds to sleep.
240
+ selector (Optional[str]): Wait until this CSS selector appears
241
+ in DOM.
242
+
243
+ Returns:
244
+ Dict[str, str]: Execution result message.
245
+ """
246
+ # Default to 1 000 ms sleep when no arguments provided
247
+ if timeout_ms is None and selector is None:
248
+ timeout_ms = 1000
249
+
250
+ action: Dict[str, Any] = {"type": "wait"}
251
+ if timeout_ms is not None:
252
+ action["timeout"] = timeout_ms
253
+ if selector is not None:
254
+ action["selector"] = selector
255
+ return await self._exec_with_snapshot(action)
256
+
257
+ async def extract(self, *, ref: str) -> Dict[str, str]:
258
+ r"""Extract text content from an element.
259
+
260
+ Args:
261
+ ref (str): Element reference ID obtained from snapshot.
262
+
263
+ Returns:
264
+ Dict[str, str]: Extracted text or error message.
265
+ """
266
+ self._validate_ref(ref, "extract")
267
+ return await self._exec_with_snapshot({"type": "extract", "ref": ref})
268
+
269
+ async def enter(self, *, ref: str) -> Dict[str, str]:
270
+ r"""Press the Enter key.
271
+
272
+ Args:
273
+ ref (str): Element reference ID to focus before pressing.
274
+
275
+ Returns:
276
+ Dict[str, str]: Execution result message.
277
+ """
278
+ self._validate_ref(ref, "enter")
279
+
280
+ action: Dict[str, Any] = {"type": "enter", "ref": ref}
281
+ return await self._exec_with_snapshot(action)
282
+
283
+ # Helper to run through ActionExecutor
284
+ async def _exec(self, action: Dict[str, Any]) -> str:
285
+ return await self._session.exec_action(action)
286
+
287
+ async def _exec_with_snapshot(
288
+ self, action: Dict[str, Any]
289
+ ) -> Dict[str, str]:
290
+ r"""Execute action and, if DOM structure changed, include snapshot
291
+ diff.
292
+ """
293
+ result = await self._session.exec_action(action)
294
+
295
+ # Only capture diff if action type typically changes DOM
296
+ from .actions import ActionExecutor
297
+
298
+ if not ActionExecutor.should_update_snapshot(action):
299
+ return {"result": result}
300
+
301
+ # Capture structural diff to previous snapshot
302
+ diff = await self._session.get_snapshot(
303
+ force_refresh=True, diff_only=True
304
+ )
305
+
306
+ if diff.startswith("- Page Snapshot (no structural changes)"):
307
+ return {"result": result}
308
+
309
+ return {"result": result, "snapshot": diff}
310
+
311
+ # ------------------------------------------------------------------
312
+ # Optional PlaywrightLLMAgent helpers
313
+ # ------------------------------------------------------------------
314
+ def _ensure_agent(self) -> PlaywrightLLMAgent:
315
+ r"""Create PlaywrightLLMAgent on first use if `web_agent_model`
316
+ provided."""
317
+ if self.web_agent_model is None:
318
+ raise RuntimeError(
319
+ "web_agent_model not supplied - high-level task planning is "
320
+ "unavailable."
321
+ )
322
+
323
+ if self._agent is None:
324
+ self._agent = PlaywrightLLMAgent(
325
+ headless=self._headless,
326
+ user_data_dir=self._user_data_dir,
327
+ model_backend=self.web_agent_model,
328
+ )
329
+ return self._agent
330
+
331
+ async def solve_task(
332
+ self, task_prompt: str, start_url: str, max_steps: int = 15
333
+ ) -> str:
334
+ r"""Use LLM agent to autonomously complete the task (requires
335
+ `web_agent_model`)."""
336
+
337
+ agent = self._ensure_agent()
338
+ await agent.navigate(start_url)
339
+ await agent.process_command(task_prompt, max_steps=max_steps)
340
+ return "Task processing finished - see stdout for detailed trace."
341
+
342
+ # ------------------------------------------------------------------
343
+ # Toolkit registration
344
+ # ------------------------------------------------------------------
345
+ def get_tools(self) -> List[FunctionTool]:
346
+ base_tools = [
347
+ FunctionTool(self.open_browser),
348
+ FunctionTool(self.close_browser),
349
+ FunctionTool(self.visit_page),
350
+ FunctionTool(self.get_page_snapshot),
351
+ FunctionTool(self.click),
352
+ FunctionTool(self.type),
353
+ FunctionTool(self.select),
354
+ FunctionTool(self.scroll),
355
+ FunctionTool(self.wait),
356
+ FunctionTool(self.extract),
357
+ FunctionTool(self.enter),
358
+ ]
359
+
360
+ if self.web_agent_model is not None:
361
+ base_tools.append(FunctionTool(self.solve_task))
362
+
363
+ return base_tools
@@ -0,0 +1,175 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from __future__ import annotations
15
+
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, Any, Optional
18
+
19
+ from .actions import ActionExecutor
20
+ from .snapshot import PageSnapshot
21
+
22
+ if TYPE_CHECKING:
23
+ from playwright.async_api import (
24
+ Browser,
25
+ BrowserContext,
26
+ Page,
27
+ Playwright,
28
+ )
29
+
30
+
31
+ class NVBrowserSession:
32
+ """Lightweight wrapper around Playwright for non-visual (headless)
33
+ browsing.
34
+
35
+ It provides a single *Page* instance plus helper utilities (snapshot &
36
+ executor). Multiple toolkits or agents can reuse this class without
37
+ duplicating Playwright setup code.
38
+ """
39
+
40
+ # Configuration constants
41
+ DEFAULT_NAVIGATION_TIMEOUT = 10000 # 10 seconds
42
+ NETWORK_IDLE_TIMEOUT = 5000 # 5 seconds
43
+
44
+ def __init__(
45
+ self, *, headless: bool = True, user_data_dir: Optional[str] = None
46
+ ):
47
+ self._headless = headless
48
+ self._user_data_dir = user_data_dir
49
+
50
+ self._playwright: Optional[Playwright] = None
51
+ self._browser: Optional[Browser] = None
52
+ self._context: Optional[BrowserContext] = None
53
+ self._page: Optional[Page] = None
54
+
55
+ self.snapshot: Optional[PageSnapshot] = None
56
+ self.executor: Optional[ActionExecutor] = None
57
+
58
+ # ------------------------------------------------------------------
59
+ # Browser lifecycle helpers
60
+ # ------------------------------------------------------------------
61
+ async def ensure_browser(self) -> None:
62
+ from playwright.async_api import async_playwright
63
+
64
+ if self._page is not None:
65
+ return
66
+
67
+ self._playwright = await async_playwright().start()
68
+ if self._user_data_dir:
69
+ Path(self._user_data_dir).mkdir(parents=True, exist_ok=True)
70
+ pl = self._playwright
71
+ assert pl is not None
72
+ self._context = await pl.chromium.launch_persistent_context(
73
+ user_data_dir=self._user_data_dir,
74
+ headless=self._headless,
75
+ )
76
+ self._browser = self._context.browser
77
+ else:
78
+ pl = self._playwright
79
+ assert pl is not None
80
+ self._browser = await pl.chromium.launch(headless=self._headless)
81
+ self._context = await self._browser.new_context()
82
+
83
+ # Reuse an already open page (persistent context may restore last
84
+ # session)
85
+ if self._context.pages:
86
+ self._page = self._context.pages[0]
87
+ else:
88
+ self._page = await self._context.new_page()
89
+ # helpers
90
+ self.snapshot = PageSnapshot(self._page)
91
+ self.executor = ActionExecutor(self._page)
92
+
93
+ async def close(self) -> None:
94
+ r"""Close all browser resources, ensuring cleanup even if some
95
+ operations fail.
96
+ """
97
+ errors: list[str] = []
98
+
99
+ # Close context first (which closes pages)
100
+ if self._context is not None:
101
+ try:
102
+ await self._context.close()
103
+ except Exception as e:
104
+ errors.append(f"Context close error: {e}")
105
+
106
+ # Close browser
107
+ if self._browser is not None:
108
+ try:
109
+ await self._browser.close()
110
+ except Exception as e:
111
+ errors.append(f"Browser close error: {e}")
112
+
113
+ # Stop playwright
114
+ if self._playwright is not None:
115
+ try:
116
+ await self._playwright.stop()
117
+ except Exception as e:
118
+ errors.append(f"Playwright stop error: {e}")
119
+
120
+ # Reset all references
121
+ self._playwright = self._browser = self._context = self._page = None
122
+ self.snapshot = self.executor = None
123
+
124
+ # Log errors if any occurred during cleanup
125
+ if errors:
126
+ from camel.logger import get_logger
127
+
128
+ logger = get_logger(__name__)
129
+ logger.warning(
130
+ "Errors during browser session cleanup: %s", "; ".join(errors)
131
+ )
132
+
133
+ # ------------------------------------------------------------------
134
+ # Convenience wrappers around common actions
135
+ # ------------------------------------------------------------------
136
+ async def visit(self, url: str) -> str:
137
+ await self.ensure_browser()
138
+ assert self._page is not None
139
+
140
+ try:
141
+ await self._page.goto(
142
+ url,
143
+ wait_until="domcontentloaded",
144
+ timeout=self.DEFAULT_NAVIGATION_TIMEOUT,
145
+ )
146
+ # Try to wait for network idle, but don't fail if it times out
147
+ try:
148
+ await self._page.wait_for_load_state(
149
+ "networkidle", timeout=self.NETWORK_IDLE_TIMEOUT
150
+ )
151
+ except Exception:
152
+ pass # Network idle timeout is not critical
153
+ return f"Visited {url}"
154
+ except Exception as e:
155
+ return f"Error visiting {url}: {e}"
156
+
157
+ async def get_snapshot(
158
+ self, *, force_refresh: bool = False, diff_only: bool = False
159
+ ) -> str:
160
+ await self.ensure_browser()
161
+ assert self.snapshot is not None
162
+ return await self.snapshot.capture(
163
+ force_refresh=force_refresh, diff_only=diff_only
164
+ )
165
+
166
+ async def exec_action(self, action: dict[str, Any]) -> str:
167
+ await self.ensure_browser()
168
+ assert self.executor is not None
169
+ return await self.executor.execute(action)
170
+
171
+ # Low-level accessors -------------------------------------------------
172
+ async def get_page(self) -> "Page":
173
+ await self.ensure_browser()
174
+ assert self._page is not None
175
+ return self._page