camel-ai 0.2.67__py3-none-any.whl → 0.2.69a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (43) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +170 -11
  3. camel/configs/vllm_config.py +2 -0
  4. camel/datagen/self_improving_cot.py +1 -1
  5. camel/environments/__init__.py +12 -0
  6. camel/environments/rlcards_env.py +860 -0
  7. camel/interpreters/docker/Dockerfile +2 -5
  8. camel/loaders/firecrawl_reader.py +4 -4
  9. camel/memories/blocks/vectordb_block.py +8 -1
  10. camel/memories/context_creators/score_based.py +185 -39
  11. camel/models/anthropic_model.py +114 -2
  12. camel/runtimes/configs.py +11 -11
  13. camel/runtimes/daytona_runtime.py +4 -4
  14. camel/runtimes/docker_runtime.py +6 -6
  15. camel/runtimes/remote_http_runtime.py +5 -5
  16. camel/societies/workforce/prompts.py +55 -21
  17. camel/societies/workforce/single_agent_worker.py +274 -14
  18. camel/societies/workforce/task_channel.py +9 -2
  19. camel/societies/workforce/utils.py +10 -2
  20. camel/societies/workforce/worker.py +74 -16
  21. camel/societies/workforce/workforce.py +90 -35
  22. camel/tasks/task.py +18 -12
  23. camel/toolkits/__init__.py +2 -0
  24. camel/toolkits/aci_toolkit.py +19 -19
  25. camel/toolkits/arxiv_toolkit.py +6 -6
  26. camel/toolkits/dappier_toolkit.py +5 -5
  27. camel/toolkits/file_write_toolkit.py +10 -10
  28. camel/toolkits/github_toolkit.py +3 -3
  29. camel/toolkits/non_visual_browser_toolkit/__init__.py +18 -0
  30. camel/toolkits/non_visual_browser_toolkit/actions.py +196 -0
  31. camel/toolkits/non_visual_browser_toolkit/agent.py +278 -0
  32. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +363 -0
  33. camel/toolkits/non_visual_browser_toolkit/nv_browser_session.py +175 -0
  34. camel/toolkits/non_visual_browser_toolkit/snapshot.js +188 -0
  35. camel/toolkits/non_visual_browser_toolkit/snapshot.py +164 -0
  36. camel/toolkits/pptx_toolkit.py +4 -4
  37. camel/toolkits/sympy_toolkit.py +1 -1
  38. camel/toolkits/task_planning_toolkit.py +3 -3
  39. camel/toolkits/thinking_toolkit.py +1 -1
  40. {camel_ai-0.2.67.dist-info → camel_ai-0.2.69a1.dist-info}/METADATA +2 -1
  41. {camel_ai-0.2.67.dist-info → camel_ai-0.2.69a1.dist-info}/RECORD +43 -35
  42. {camel_ai-0.2.67.dist-info → camel_ai-0.2.69a1.dist-info}/WHEEL +0 -0
  43. {camel_ai-0.2.67.dist-info → camel_ai-0.2.69a1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,196 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import asyncio
15
+ from typing import TYPE_CHECKING, Any, Dict
16
+
17
+ if TYPE_CHECKING:
18
+ from playwright.async_api import Page
19
+
20
+
21
+ class ActionExecutor:
22
+ r"""Executes high-level actions (click, type …) on a Playwright Page."""
23
+
24
+ # Configuration constants
25
+ DEFAULT_TIMEOUT = 5000 # 5 seconds
26
+ SHORT_TIMEOUT = 2000 # 2 seconds
27
+
28
+ def __init__(self, page: "Page"):
29
+ self.page = page
30
+
31
+ # ------------------------------------------------------------------
32
+ # Public helpers
33
+ # ------------------------------------------------------------------
34
+ async def execute(self, action: Dict[str, Any]) -> str:
35
+ if not action:
36
+ return "No action to execute"
37
+
38
+ action_type = action.get("type")
39
+ if not action_type:
40
+ return "Error: action has no type"
41
+
42
+ try:
43
+ # small helper to ensure basic stability
44
+ await self._wait_dom_stable()
45
+
46
+ handler = {
47
+ "click": self._click,
48
+ "type": self._type,
49
+ "select": self._select,
50
+ "wait": self._wait,
51
+ "extract": self._extract,
52
+ "scroll": self._scroll,
53
+ "enter": self._enter,
54
+ }.get(action_type)
55
+
56
+ if handler is None:
57
+ return f"Error: Unknown action type '{action_type}'"
58
+
59
+ return await handler(action)
60
+ except Exception as exc:
61
+ return f"Error executing {action_type}: {exc}"
62
+
63
+ # ------------------------------------------------------------------
64
+ # Internal handlers
65
+ # ------------------------------------------------------------------
66
+ async def _click(self, action: Dict[str, Any]) -> str:
67
+ ref = action.get("ref")
68
+ text = action.get("text")
69
+ selector = action.get("selector")
70
+ if not (ref or text or selector):
71
+ return "Error: click requires ref/text/selector"
72
+
73
+ strategies = []
74
+ if selector:
75
+ strategies.append(selector)
76
+ if text:
77
+ strategies.append(f'text="{text}"')
78
+ if ref:
79
+ strategies.append(f"[aria-ref='{ref}']")
80
+
81
+ for sel in strategies:
82
+ try:
83
+ if await self.page.locator(sel).count() > 0:
84
+ await self.page.click(
85
+ sel, timeout=self.SHORT_TIMEOUT, force=True
86
+ )
87
+ return f"Clicked element via {sel}"
88
+ except Exception:
89
+ pass
90
+ return "Error: Could not click element"
91
+
92
+ async def _type(self, action: Dict[str, Any]) -> str:
93
+ ref = action.get("ref")
94
+ selector = action.get("selector")
95
+ text = action.get("text", "")
96
+ if not (ref or selector):
97
+ return "Error: type requires ref/selector"
98
+ target = selector or f"[aria-ref='{ref}']"
99
+ try:
100
+ await self.page.fill(target, text, timeout=self.SHORT_TIMEOUT)
101
+ return f"Typed '{text}' into {target}"
102
+ except Exception as exc:
103
+ return f"Type failed: {exc}"
104
+
105
+ async def _select(self, action: Dict[str, Any]) -> str:
106
+ ref = action.get("ref")
107
+ selector = action.get("selector")
108
+ value = action.get("value", "")
109
+ if not (ref or selector):
110
+ return "Error: select requires ref/selector"
111
+ target = selector or f"[aria-ref='{ref}']"
112
+ try:
113
+ await self.page.select_option(
114
+ target, value, timeout=self.DEFAULT_TIMEOUT
115
+ )
116
+ return f"Selected '{value}' in {target}"
117
+ except Exception as exc:
118
+ return f"Select failed: {exc}"
119
+
120
+ async def _wait(self, action: Dict[str, Any]) -> str:
121
+ if "timeout" in action:
122
+ ms = action["timeout"]
123
+ await asyncio.sleep(ms / 1000)
124
+ return f"Waited {ms}ms"
125
+ if "selector" in action:
126
+ sel = action["selector"]
127
+ await self.page.wait_for_selector(
128
+ sel, timeout=self.DEFAULT_TIMEOUT
129
+ )
130
+ return f"Waited for {sel}"
131
+ return "Error: wait requires timeout/selector"
132
+
133
+ async def _extract(self, action: Dict[str, Any]) -> str:
134
+ ref = action.get("ref")
135
+ if not ref:
136
+ return "Error: extract requires ref"
137
+ target = f"[aria-ref='{ref}']"
138
+ await self.page.wait_for_selector(target, timeout=self.DEFAULT_TIMEOUT)
139
+ txt = await self.page.text_content(target)
140
+ return f"Extracted: {txt[:100] if txt else 'None'}"
141
+
142
+ async def _scroll(self, action: Dict[str, Any]) -> str:
143
+ direction = action.get("direction", "down")
144
+ amount = action.get("amount", 300)
145
+
146
+ # Validate inputs to prevent injection
147
+ if direction not in ("up", "down"):
148
+ return "Error: direction must be 'up' or 'down'"
149
+
150
+ try:
151
+ # Safely convert amount to integer and clamp to reasonable range
152
+ amount_int = int(amount)
153
+ amount_int = max(
154
+ -5000, min(5000, amount_int)
155
+ ) # Clamp between -5000 and 5000
156
+ except (ValueError, TypeError):
157
+ return "Error: amount must be a valid number"
158
+
159
+ # Use safe evaluation with bound parameters
160
+ scroll_offset = amount_int if direction == "down" else -amount_int
161
+ await self.page.evaluate(f"window.scrollBy(0, {scroll_offset})")
162
+ await asyncio.sleep(0.5)
163
+ return f"Scrolled {direction} by {abs(amount_int)}px"
164
+
165
+ async def _enter(self, action: Dict[str, Any]) -> str:
166
+ ref = action.get("ref")
167
+ selector = action.get("selector")
168
+ if ref:
169
+ await self.page.focus(f"[aria-ref='{ref}']")
170
+ elif selector:
171
+ await self.page.focus(selector)
172
+ await self.page.keyboard.press("Enter")
173
+ await asyncio.sleep(0.3)
174
+ return "Pressed Enter"
175
+
176
+ # utilities
177
+ async def _wait_dom_stable(self) -> None:
178
+ try:
179
+ await self.page.wait_for_load_state(
180
+ 'domcontentloaded', timeout=self.SHORT_TIMEOUT
181
+ )
182
+ except Exception:
183
+ pass
184
+
185
+ # static helpers
186
+ @staticmethod
187
+ def should_update_snapshot(action: Dict[str, Any]) -> bool:
188
+ change_types = {
189
+ "click",
190
+ "type",
191
+ "select",
192
+ "scroll",
193
+ "navigate",
194
+ "enter",
195
+ }
196
+ return action.get("type") in change_types
@@ -0,0 +1,278 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import json
15
+ import logging
16
+ import re
17
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
18
+
19
+ from camel.models import BaseModelBackend, ModelFactory
20
+ from camel.types import ModelPlatformType, ModelType
21
+
22
+ from .actions import ActionExecutor
23
+ from .nv_browser_session import NVBrowserSession
24
+
25
+ if TYPE_CHECKING:
26
+ from camel.agents import ChatAgent
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class PlaywrightLLMAgent:
32
+ """High-level orchestration: snapshot ↔ LLM ↔ action executor."""
33
+
34
+ # System prompt as class constant to avoid recreation
35
+ SYSTEM_PROMPT = """
36
+ You are a web automation assistant.
37
+
38
+ " Analyse the page snapshot and create a short high-level plan, "
39
+ "then output the FIRST action to start with.\n\n"
40
+ "Return a JSON object in *exactly* this shape:\n"
41
+ "Action format json_object examples:\n"
42
+ "{\n \"plan\": [\"Step 1\", \"Step 2\"],\n \"action\": {\n \"type\":
43
+ \"click\",\n \"ref\": \"e1\"\n }\n}\n\n"
44
+ "If task is already complete:\n"
45
+ "{\n \"plan\": [],\n \"action\": {\n \"type\": \"finish\",
46
+ \n \"ref\": null,\n \"summary\": \"Task was already completed. Summary
47
+ of what was found...\"\n }\n}"
48
+
49
+ Available action types:
50
+ - 'click': {"type": "click", "ref": "e1"} or {"type": "click", "text":
51
+ "Button Text"} or {"type": "click", "selector": "button"}
52
+ - 'type': {"type": "type", "ref": "e1", "text": "search text"} or {"type":
53
+ "type", "selector": "input", "text": "search text"}
54
+ - 'select': {"type": "select", "ref": "e1", "value": "option"} or {"type":
55
+ "select", "selector": "select", "value": "option"}
56
+ - 'wait': {"type": "wait", "timeout": 2000} or {"type": "wait", "selector":
57
+ "#element"}
58
+ - 'scroll': {"type": "scroll", "direction": "down", "amount": 300}
59
+ - 'enter': {"type": "enter", "ref": "e1"} or {"type": "enter", "selector":
60
+ "input[name=q]"} or {"type": "enter"}
61
+ - 'navigate': {"type": "navigate", "url": "https://example.com"}
62
+ - 'finish': {"type": "finish", "ref": null, "summary": "task completion
63
+ summary"}
64
+
65
+ IMPORTANT:
66
+ - For 'click': Use 'ref' from snapshot, or 'text' for visible text,
67
+ or 'selector' for CSS selectors
68
+ - For 'type'/'select': Use 'ref' from snapshot or 'selector' for CSS selectors
69
+ - Only use 'ref' values that exist in the snapshot (e.g., ref=e1, ref=e2, etc.)
70
+ - Use 'finish' when the task is completed successfully with a summary of
71
+ what was accomplished
72
+ - Use 'enter' to press the Enter key (optionally focus an element first)
73
+ - Use 'navigate' to open a new URL before interacting further
74
+ - click can choose radio, checkbox...
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ *,
80
+ user_data_dir: Optional[str] = None,
81
+ headless: bool = False,
82
+ model_backend: Optional[BaseModelBackend] = None,
83
+ ):
84
+ self._session = NVBrowserSession(
85
+ headless=headless, user_data_dir=user_data_dir
86
+ )
87
+ from camel.agents import ChatAgent
88
+
89
+ # Populated lazily after first page load
90
+ self.action_history: List[Dict[str, Any]] = []
91
+ if model_backend is None:
92
+ model_backend = ModelFactory.create(
93
+ model_platform=ModelPlatformType.OPENAI,
94
+ model_type=ModelType.GPT_4O_MINI,
95
+ model_config_dict={"temperature": 0, "top_p": 1},
96
+ )
97
+ self.model_backend = model_backend
98
+ # Reuse ChatAgent instance to avoid recreation overhead
99
+ self._chat_agent: Optional[ChatAgent] = None
100
+
101
+ async def navigate(self, url: str) -> str:
102
+ try:
103
+ # NVBrowserSession handles waits internally
104
+ logger.debug("Navigated to URL: %s", url)
105
+ await self._session.visit(url)
106
+ return await self._session.get_snapshot(force_refresh=True)
107
+ except Exception as exc:
108
+ return f"Error: could not navigate - {exc}"
109
+
110
+ def _get_chat_agent(self) -> "ChatAgent":
111
+ """Get or create the ChatAgent instance."""
112
+ from camel.agents import ChatAgent
113
+
114
+ if self._chat_agent is None:
115
+ self._chat_agent = ChatAgent(
116
+ system_message=self.SYSTEM_PROMPT, model=self.model_backend
117
+ )
118
+ return self._chat_agent
119
+
120
+ def _safe_parse_json(self, content: str) -> Dict[str, Any]:
121
+ r"""Safely parse JSON from LLM response with multiple fallback
122
+ strategies.
123
+ """
124
+ # First attempt: direct parsing
125
+ try:
126
+ return json.loads(content)
127
+ except json.JSONDecodeError:
128
+ pass
129
+
130
+ # Second attempt: extract JSON-like block using regex
131
+ # Look for content between outermost braces
132
+ json_pattern = re.compile(
133
+ r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', re.DOTALL
134
+ )
135
+ json_matches = json_pattern.findall(content)
136
+
137
+ for match in json_matches:
138
+ try:
139
+ return json.loads(match)
140
+ except json.JSONDecodeError:
141
+ continue
142
+
143
+ # Third attempt: try to find and parse line by line
144
+ lines = content.split('\n')
145
+ json_lines = []
146
+ in_json = False
147
+
148
+ for line in lines:
149
+ line = line.strip()
150
+ if line.startswith('{'):
151
+ in_json = True
152
+ json_lines = [line]
153
+ elif in_json:
154
+ json_lines.append(line)
155
+ if line.endswith('}'):
156
+ try:
157
+ json_text = '\n'.join(json_lines)
158
+ return json.loads(json_text)
159
+ except json.JSONDecodeError:
160
+ pass
161
+ in_json = False
162
+ json_lines = []
163
+
164
+ # Fallback: return default structure
165
+ logger.warning(
166
+ "Could not parse JSON from LLM response: %s", content[:200]
167
+ )
168
+ return {
169
+ "plan": ["Could not parse response"],
170
+ "action": {
171
+ "type": "finish",
172
+ "ref": None,
173
+ "summary": "Parsing error",
174
+ },
175
+ }
176
+
177
+ def _llm_call(
178
+ self,
179
+ prompt: str,
180
+ snapshot: str,
181
+ is_initial: bool,
182
+ history: Optional[List[Dict[str, Any]]] = None,
183
+ ) -> Dict[str, Any]:
184
+ """Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
185
+ # Build user message
186
+ if is_initial:
187
+ user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
188
+ else:
189
+ hist_lines = [
190
+ (
191
+ f"{i + 1}. {'✅' if h['success'] else '❌'} "
192
+ f"{h['action']['type']} -> {h['result']}"
193
+ )
194
+ for i, h in enumerate(history or [])
195
+ ]
196
+ user_content = (
197
+ f"Snapshot:\n{snapshot}\n\nHistory:\n"
198
+ + "\n".join(hist_lines)
199
+ + f"\n\nTask: {prompt}"
200
+ )
201
+
202
+ # Run ChatAgent
203
+ chat_agent = self._get_chat_agent()
204
+ response = chat_agent.step(user_content)
205
+ content = response.msgs[0].content if response.msgs else "{}"
206
+
207
+ # Safely parse JSON response
208
+ return self._safe_parse_json(content)
209
+
210
+ async def process_command(self, prompt: str, max_steps: int = 15):
211
+ # initial full snapshot
212
+ full_snapshot = await self._session.get_snapshot()
213
+ assert self._session.snapshot is not None
214
+ meta = self._session.snapshot.last_info
215
+ logger.info("Initial snapshot priorities=%s", meta["priorities"])
216
+ logger.debug("Full snapshot:\n%s", full_snapshot)
217
+
218
+ plan_resp = self._llm_call(
219
+ prompt, full_snapshot or "", is_initial=True
220
+ )
221
+ plan = plan_resp.get("plan", [])
222
+ action = plan_resp.get("action")
223
+
224
+ logger.info("Plan generated: %s", json.dumps(plan, ensure_ascii=False))
225
+
226
+ steps = 0
227
+ while action and steps < max_steps:
228
+ if action.get("type") == "finish":
229
+ logger.info("Task finished: %s", action.get("summary", "Done"))
230
+ break
231
+
232
+ result = await self._run_action(action)
233
+ logger.debug("Executed action: %s | Result: %s", action, result)
234
+
235
+ self.action_history.append(
236
+ {
237
+ "action": action,
238
+ "result": result,
239
+ "success": "Error" not in result,
240
+ }
241
+ )
242
+
243
+ diff_snapshot = await self._session.get_snapshot(
244
+ force_refresh=ActionExecutor.should_update_snapshot(action),
245
+ diff_only=True,
246
+ )
247
+ assert self._session.snapshot is not None
248
+ meta = self._session.snapshot.last_info
249
+ logger.debug(
250
+ "Snapshot after action (diff=%s):\n%s",
251
+ meta["is_diff"],
252
+ diff_snapshot,
253
+ )
254
+
255
+ # Update full snapshot if page changed
256
+ if meta["is_diff"] and not diff_snapshot.startswith(
257
+ "- Page Snapshot (no structural changes)"
258
+ ):
259
+ assert self._session.snapshot is not None
260
+ full_snapshot = self._session.snapshot.snapshot_data or ""
261
+
262
+ action = self._llm_call(
263
+ prompt,
264
+ full_snapshot or "",
265
+ is_initial=False,
266
+ history=self.action_history,
267
+ ).get("action")
268
+ steps += 1
269
+
270
+ logger.info("Process completed with %d steps", steps)
271
+
272
+ async def _run_action(self, action: Dict[str, Any]) -> str:
273
+ if action.get("type") == "navigate":
274
+ return await self.navigate(action.get("url", ""))
275
+ return await self._session.exec_action(action)
276
+
277
+ async def close(self):
278
+ await self._session.close()