camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/_utils.py +38 -0
  4. camel/agents/chat_agent.py +4014 -410
  5. camel/agents/mcp_agent.py +30 -27
  6. camel/agents/repo_agent.py +2 -1
  7. camel/benchmarks/browsecomp.py +6 -6
  8. camel/configs/__init__.py +15 -0
  9. camel/configs/aihubmix_config.py +88 -0
  10. camel/configs/amd_config.py +70 -0
  11. camel/configs/cometapi_config.py +104 -0
  12. camel/configs/minimax_config.py +93 -0
  13. camel/configs/nebius_config.py +103 -0
  14. camel/configs/vllm_config.py +2 -0
  15. camel/data_collectors/alpaca_collector.py +15 -6
  16. camel/datagen/self_improving_cot.py +1 -1
  17. camel/datasets/base_generator.py +39 -10
  18. camel/environments/__init__.py +12 -0
  19. camel/environments/rlcards_env.py +860 -0
  20. camel/environments/single_step.py +28 -3
  21. camel/environments/tic_tac_toe.py +1 -1
  22. camel/interpreters/__init__.py +2 -0
  23. camel/interpreters/docker/Dockerfile +4 -16
  24. camel/interpreters/docker_interpreter.py +3 -2
  25. camel/interpreters/e2b_interpreter.py +34 -1
  26. camel/interpreters/internal_python_interpreter.py +51 -2
  27. camel/interpreters/microsandbox_interpreter.py +395 -0
  28. camel/loaders/__init__.py +11 -2
  29. camel/loaders/base_loader.py +85 -0
  30. camel/loaders/chunkr_reader.py +9 -0
  31. camel/loaders/firecrawl_reader.py +4 -4
  32. camel/logger.py +1 -1
  33. camel/memories/agent_memories.py +84 -1
  34. camel/memories/base.py +34 -0
  35. camel/memories/blocks/chat_history_block.py +122 -4
  36. camel/memories/blocks/vectordb_block.py +8 -1
  37. camel/memories/context_creators/score_based.py +29 -237
  38. camel/memories/records.py +88 -8
  39. camel/messages/base.py +166 -40
  40. camel/messages/func_message.py +32 -5
  41. camel/models/__init__.py +10 -0
  42. camel/models/aihubmix_model.py +83 -0
  43. camel/models/aiml_model.py +1 -16
  44. camel/models/amd_model.py +101 -0
  45. camel/models/anthropic_model.py +117 -18
  46. camel/models/aws_bedrock_model.py +2 -33
  47. camel/models/azure_openai_model.py +205 -91
  48. camel/models/base_audio_model.py +3 -1
  49. camel/models/base_model.py +189 -24
  50. camel/models/cohere_model.py +5 -17
  51. camel/models/cometapi_model.py +83 -0
  52. camel/models/crynux_model.py +1 -16
  53. camel/models/deepseek_model.py +6 -16
  54. camel/models/fish_audio_model.py +6 -0
  55. camel/models/gemini_model.py +71 -20
  56. camel/models/groq_model.py +1 -17
  57. camel/models/internlm_model.py +1 -16
  58. camel/models/litellm_model.py +49 -32
  59. camel/models/lmstudio_model.py +1 -17
  60. camel/models/minimax_model.py +83 -0
  61. camel/models/mistral_model.py +1 -16
  62. camel/models/model_factory.py +27 -1
  63. camel/models/model_manager.py +24 -6
  64. camel/models/modelscope_model.py +1 -16
  65. camel/models/moonshot_model.py +185 -19
  66. camel/models/nebius_model.py +83 -0
  67. camel/models/nemotron_model.py +0 -5
  68. camel/models/netmind_model.py +1 -16
  69. camel/models/novita_model.py +1 -16
  70. camel/models/nvidia_model.py +1 -16
  71. camel/models/ollama_model.py +4 -19
  72. camel/models/openai_compatible_model.py +171 -46
  73. camel/models/openai_model.py +205 -77
  74. camel/models/openrouter_model.py +1 -17
  75. camel/models/ppio_model.py +1 -16
  76. camel/models/qianfan_model.py +1 -16
  77. camel/models/qwen_model.py +1 -16
  78. camel/models/reka_model.py +1 -16
  79. camel/models/samba_model.py +34 -47
  80. camel/models/sglang_model.py +64 -31
  81. camel/models/siliconflow_model.py +1 -16
  82. camel/models/stub_model.py +0 -4
  83. camel/models/togetherai_model.py +1 -16
  84. camel/models/vllm_model.py +1 -16
  85. camel/models/volcano_model.py +0 -17
  86. camel/models/watsonx_model.py +1 -16
  87. camel/models/yi_model.py +1 -16
  88. camel/models/zhipuai_model.py +60 -16
  89. camel/parsers/__init__.py +18 -0
  90. camel/parsers/mcp_tool_call_parser.py +176 -0
  91. camel/retrievers/auto_retriever.py +1 -0
  92. camel/runtimes/configs.py +11 -11
  93. camel/runtimes/daytona_runtime.py +15 -16
  94. camel/runtimes/docker_runtime.py +6 -6
  95. camel/runtimes/remote_http_runtime.py +5 -5
  96. camel/services/agent_openapi_server.py +380 -0
  97. camel/societies/__init__.py +2 -0
  98. camel/societies/role_playing.py +26 -28
  99. camel/societies/workforce/__init__.py +2 -0
  100. camel/societies/workforce/events.py +122 -0
  101. camel/societies/workforce/prompts.py +249 -38
  102. camel/societies/workforce/role_playing_worker.py +82 -20
  103. camel/societies/workforce/single_agent_worker.py +634 -34
  104. camel/societies/workforce/structured_output_handler.py +512 -0
  105. camel/societies/workforce/task_channel.py +169 -23
  106. camel/societies/workforce/utils.py +176 -9
  107. camel/societies/workforce/worker.py +77 -23
  108. camel/societies/workforce/workflow_memory_manager.py +772 -0
  109. camel/societies/workforce/workforce.py +3168 -478
  110. camel/societies/workforce/workforce_callback.py +74 -0
  111. camel/societies/workforce/workforce_logger.py +203 -175
  112. camel/societies/workforce/workforce_metrics.py +33 -0
  113. camel/storages/__init__.py +4 -0
  114. camel/storages/key_value_storages/json.py +15 -2
  115. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  116. camel/storages/object_storages/google_cloud.py +1 -1
  117. camel/storages/vectordb_storages/__init__.py +6 -0
  118. camel/storages/vectordb_storages/chroma.py +731 -0
  119. camel/storages/vectordb_storages/oceanbase.py +13 -13
  120. camel/storages/vectordb_storages/pgvector.py +349 -0
  121. camel/storages/vectordb_storages/qdrant.py +3 -3
  122. camel/storages/vectordb_storages/surreal.py +365 -0
  123. camel/storages/vectordb_storages/tidb.py +8 -6
  124. camel/tasks/task.py +244 -27
  125. camel/toolkits/__init__.py +46 -8
  126. camel/toolkits/aci_toolkit.py +64 -19
  127. camel/toolkits/arxiv_toolkit.py +6 -6
  128. camel/toolkits/base.py +63 -5
  129. camel/toolkits/code_execution.py +28 -1
  130. camel/toolkits/context_summarizer_toolkit.py +684 -0
  131. camel/toolkits/craw4ai_toolkit.py +93 -0
  132. camel/toolkits/dappier_toolkit.py +10 -6
  133. camel/toolkits/dingtalk.py +1135 -0
  134. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  135. camel/toolkits/excel_toolkit.py +901 -67
  136. camel/toolkits/file_toolkit.py +1402 -0
  137. camel/toolkits/function_tool.py +30 -6
  138. camel/toolkits/github_toolkit.py +107 -20
  139. camel/toolkits/gmail_toolkit.py +1839 -0
  140. camel/toolkits/google_calendar_toolkit.py +38 -4
  141. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  142. camel/toolkits/human_toolkit.py +34 -10
  143. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  144. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  145. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  146. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  147. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  148. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
  149. camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
  150. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  151. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
  152. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  153. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
  154. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  155. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  156. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  157. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  158. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
  159. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  160. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  161. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
  162. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  163. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  164. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  165. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  166. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  167. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  168. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  169. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  170. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  171. camel/toolkits/image_generation_toolkit.py +390 -0
  172. camel/toolkits/jina_reranker_toolkit.py +3 -4
  173. camel/toolkits/klavis_toolkit.py +5 -1
  174. camel/toolkits/markitdown_toolkit.py +104 -0
  175. camel/toolkits/math_toolkit.py +64 -10
  176. camel/toolkits/mcp_toolkit.py +370 -45
  177. camel/toolkits/memory_toolkit.py +5 -1
  178. camel/toolkits/message_agent_toolkit.py +608 -0
  179. camel/toolkits/message_integration.py +724 -0
  180. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  181. camel/toolkits/note_taking_toolkit.py +277 -0
  182. camel/toolkits/notion_mcp_toolkit.py +224 -0
  183. camel/toolkits/openbb_toolkit.py +5 -1
  184. camel/toolkits/origene_mcp_toolkit.py +56 -0
  185. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  186. camel/toolkits/pptx_toolkit.py +25 -12
  187. camel/toolkits/resend_toolkit.py +168 -0
  188. camel/toolkits/screenshot_toolkit.py +213 -0
  189. camel/toolkits/search_toolkit.py +437 -142
  190. camel/toolkits/slack_toolkit.py +104 -50
  191. camel/toolkits/sympy_toolkit.py +1 -1
  192. camel/toolkits/task_planning_toolkit.py +3 -3
  193. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  194. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  195. camel/toolkits/terminal_toolkit/utils.py +532 -0
  196. camel/toolkits/thinking_toolkit.py +1 -1
  197. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  198. camel/toolkits/video_analysis_toolkit.py +106 -26
  199. camel/toolkits/video_download_toolkit.py +17 -14
  200. camel/toolkits/web_deploy_toolkit.py +1219 -0
  201. camel/toolkits/wechat_official_toolkit.py +483 -0
  202. camel/toolkits/zapier_toolkit.py +5 -1
  203. camel/types/__init__.py +2 -2
  204. camel/types/agents/tool_calling_record.py +4 -1
  205. camel/types/enums.py +316 -40
  206. camel/types/openai_types.py +2 -2
  207. camel/types/unified_model_type.py +31 -4
  208. camel/utils/commons.py +36 -5
  209. camel/utils/constants.py +3 -0
  210. camel/utils/context_utils.py +1003 -0
  211. camel/utils/mcp.py +138 -4
  212. camel/utils/mcp_client.py +45 -1
  213. camel/utils/message_summarizer.py +148 -0
  214. camel/utils/token_counting.py +43 -20
  215. camel/utils/tool_result.py +44 -0
  216. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
  217. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
  218. camel/loaders/pandas_reader.py +0 -368
  219. camel/toolkits/dalle_toolkit.py +0 -175
  220. camel/toolkits/file_write_toolkit.py +0 -444
  221. camel/toolkits/openai_agent_toolkit.py +0 -135
  222. camel/toolkits/terminal_toolkit.py +0 -1037
  223. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  224. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,311 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import json
15
+ import re
16
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
17
+
18
+ from camel.logger import get_logger
19
+ from camel.models import BaseModelBackend, ModelFactory
20
+ from camel.types import ModelPlatformType, ModelType
21
+
22
+ from .actions import ActionExecutor
23
+ from .browser_session import HybridBrowserSession
24
+
25
+ if TYPE_CHECKING:
26
+ from camel.agents import ChatAgent
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ class PlaywrightLLMAgent:
32
+ r"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
33
+
34
+ # System prompt as class constant to avoid recreation
35
+ SYSTEM_PROMPT = """
36
+ You are a web automation assistant.
37
+
38
+ " Analyse the page snapshot and create a short high-level plan, "
39
+ "then output the FIRST action to start with.\n\n"
40
+ "Return a JSON object in *exactly* this shape:\n"
41
+ "Action format json_object examples:\n"
42
+ "{\n \"plan\": [\"Step 1\", \"Step 2\"],\n \"action\": {\n \"type\":
43
+ \"click\",\n \"ref\": \"e1\"\n }\n}\n\n"
44
+ "If task is already complete:\n"
45
+ "{\n \"plan\": [],\n \"action\": {\n \"type\": \"finish\",
46
+ \n \"ref\": null,\n \"summary\": \"Task was already completed. Summary
47
+ of what was found...\"\n }\n}"
48
+
49
+ Available action types:
50
+ - 'click': {"type": "click", "ref": "e1"} or {"type": "click", "text":
51
+ "Button Text"} or {"type": "click", "selector": "button"}
52
+ - 'type': {"type": "type", "ref": "e1", "text": "search text"} or {"type":
53
+ "type", "selector": "input", "text": "search text"}
54
+ - 'select': {"type": "select", "ref": "e1", "value": "option"} or {"type":
55
+ "select", "selector": "select", "value": "option"}
56
+ - 'wait': {"type": "wait", "timeout": 2000} or {"type": "wait", "selector":
57
+ "#element"}
58
+ - 'scroll': {"type": "scroll", "direction": "down", "amount": 300}
59
+ - 'enter': {"type": "enter", "ref": "e1"} or {"type": "enter", "selector":
60
+ "input[name=q]"} or {"type": "enter"}
61
+ - 'navigate': {"type": "navigate", "url": "https://example.com"}
62
+ - 'finish': {"type": "finish", "ref": null, "summary": "task completion
63
+ summary"}
64
+
65
+ IMPORTANT:
66
+ - For 'click': Use 'ref' from snapshot, or 'text' for visible text,
67
+ or 'selector' for CSS selectors
68
+ - For 'type'/'select': Use 'ref' from snapshot or 'selector' for CSS selectors
69
+ - Only use 'ref' values that exist in the snapshot (e.g., ref=e1, ref=e2, etc.)
70
+ - Use 'finish' when the task is completed successfully with a summary of
71
+ what was accomplished
72
+ - Use 'enter' to press the Enter key (optionally focus an element first)
73
+ - Use 'navigate' to open a new URL before interacting further
74
+ - click can choose radio, checkbox...
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ *,
80
+ user_data_dir: Optional[str] = None,
81
+ headless: bool = False,
82
+ stealth: bool = False,
83
+ model_backend: Optional[BaseModelBackend] = None,
84
+ default_timeout: Optional[int] = None,
85
+ short_timeout: Optional[int] = None,
86
+ ):
87
+ self._session = HybridBrowserSession(
88
+ headless=headless,
89
+ user_data_dir=user_data_dir,
90
+ stealth=stealth,
91
+ default_timeout=default_timeout,
92
+ short_timeout=short_timeout,
93
+ )
94
+ from camel.agents import ChatAgent
95
+
96
+ # Populated lazily after first page load
97
+ self.action_history: List[Dict[str, Any]] = []
98
+ if model_backend is None:
99
+ model_backend = ModelFactory.create(
100
+ model_platform=ModelPlatformType.DEFAULT,
101
+ model_type=ModelType.DEFAULT,
102
+ model_config_dict={"temperature": 0, "top_p": 1},
103
+ )
104
+ self.model_backend = model_backend
105
+ # Reuse ChatAgent instance to avoid recreation overhead
106
+ self._chat_agent: Optional[ChatAgent] = None
107
+
108
+ async def navigate(self, url: str) -> str:
109
+ r"""Navigate to a URL and return the snapshot."""
110
+ try:
111
+ # HybridBrowserSession handles waits internally
112
+ logger.debug("Navigated to URL: %s", url)
113
+ await self._session.visit(url)
114
+ return await self._session.get_snapshot(force_refresh=True)
115
+ except Exception as exc:
116
+ error_msg = f"Error: could not navigate to {url} - {exc}"
117
+ logger.error(error_msg)
118
+ return error_msg
119
+
120
+ def _get_chat_agent(self) -> "ChatAgent":
121
+ r"""Get or create the ChatAgent instance."""
122
+ from camel.agents import ChatAgent
123
+
124
+ if self._chat_agent is None:
125
+ self._chat_agent = ChatAgent(
126
+ system_message=self.SYSTEM_PROMPT, model=self.model_backend
127
+ )
128
+ return self._chat_agent
129
+
130
+ def _safe_parse_json(self, content: str) -> Dict[str, Any]:
131
+ r"""Safely parse JSON from LLM response with multiple fallback
132
+ strategies.
133
+ """
134
+ # First attempt: direct parsing
135
+ try:
136
+ return json.loads(content)
137
+ except json.JSONDecodeError:
138
+ pass
139
+
140
+ # Second attempt: extract JSON-like block using regex
141
+ # Look for content between outermost braces
142
+ json_pattern = re.compile(
143
+ r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', re.DOTALL
144
+ )
145
+ json_matches = json_pattern.findall(content)
146
+
147
+ for match in json_matches:
148
+ try:
149
+ return json.loads(match)
150
+ except json.JSONDecodeError:
151
+ continue
152
+
153
+ # Third attempt: try to find and parse line by line
154
+ lines = content.split('\n')
155
+ json_lines = []
156
+ in_json = False
157
+
158
+ for line in lines:
159
+ line = line.strip()
160
+ if line.startswith('{'):
161
+ in_json = True
162
+ json_lines = [line]
163
+ elif in_json:
164
+ json_lines.append(line)
165
+ if line.endswith('}'):
166
+ try:
167
+ json_text = '\n'.join(json_lines)
168
+ return json.loads(json_text)
169
+ except json.JSONDecodeError:
170
+ pass
171
+ in_json = False
172
+ json_lines = []
173
+
174
+ # Fallback: return default structure
175
+ logger.warning(
176
+ "Could not parse JSON from LLM response: %s", content[:200]
177
+ )
178
+ return self._get_fallback_response("Parsing error")
179
+
180
+ def _get_fallback_response(self, error_msg: str) -> Dict[str, Any]:
181
+ r"""Generate a fallback response structure."""
182
+ return {
183
+ "plan": [f"Could not parse response: {error_msg}"],
184
+ "action": {
185
+ "type": "finish",
186
+ "ref": None,
187
+ "summary": f"Parsing error: {error_msg}",
188
+ },
189
+ }
190
+
191
+ def _llm_call(
192
+ self,
193
+ prompt: str,
194
+ snapshot: str,
195
+ is_initial: bool,
196
+ history: Optional[List[Dict[str, Any]]] = None,
197
+ ) -> Dict[str, Any]:
198
+ r"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
199
+ # Build user message
200
+ if is_initial:
201
+ user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
202
+ else:
203
+ hist_lines = [
204
+ (
205
+ f"{i + 1}. {'✅' if h['success'] else '❌'} "
206
+ f"{h['action']['type']} -> {h['result']}"
207
+ )
208
+ for i, h in enumerate(history or [])
209
+ ]
210
+ user_content = (
211
+ f"Snapshot:\n{snapshot}\n\nHistory:\n"
212
+ + "\n".join(hist_lines)
213
+ + f"\n\nTask: {prompt}"
214
+ )
215
+
216
+ # Run ChatAgent
217
+ chat_agent = self._get_chat_agent()
218
+ response = chat_agent.step(user_content)
219
+ content = response.msgs[0].content if response.msgs else "{}"
220
+
221
+ # Safely parse JSON response
222
+ return self._safe_parse_json(content)
223
+
224
+ async def process_command(self, prompt: str, max_steps: int = 15):
225
+ r"""Process a command using LLM-guided browser automation."""
226
+ # initial full snapshot
227
+ full_snapshot = await self._session.get_snapshot()
228
+ assert self._session.snapshot is not None
229
+ meta = self._session.snapshot.last_info
230
+ logger.info("Initial snapshot priorities=%s", meta["priorities"])
231
+ logger.debug("Full snapshot:\n%s", full_snapshot)
232
+
233
+ plan_resp = self._llm_call(
234
+ prompt, full_snapshot or "", is_initial=True
235
+ )
236
+ plan = plan_resp.get("plan", [])
237
+ action = plan_resp.get("action")
238
+
239
+ logger.info("Plan generated: %s", json.dumps(plan, ensure_ascii=False))
240
+
241
+ steps = 0
242
+ while action and steps < max_steps:
243
+ if action.get("type") == "finish":
244
+ logger.info("Task finished: %s", action.get("summary", "Done"))
245
+ break
246
+
247
+ result = await self._run_action(action)
248
+ logger.debug("Executed action: %s | Result: %s", action, result)
249
+
250
+ success = False
251
+ result_for_history = ""
252
+
253
+ if isinstance(result, str):
254
+ success = "Error" not in result
255
+ result_for_history = result
256
+ elif isinstance(result, dict):
257
+ success = result.get('success', False)
258
+ result_for_history = result.get('message', str(result))
259
+ else:
260
+ # Fallback case
261
+ success = False
262
+ result_for_history = str(result)
263
+
264
+ self.action_history.append(
265
+ {
266
+ "action": action,
267
+ "result": result_for_history,
268
+ "success": success,
269
+ }
270
+ )
271
+
272
+ diff_snapshot = await self._session.get_snapshot(
273
+ force_refresh=ActionExecutor.should_update_snapshot(action),
274
+ diff_only=True,
275
+ )
276
+ assert self._session.snapshot is not None
277
+ meta = self._session.snapshot.last_info
278
+ logger.debug(
279
+ "Snapshot after action (diff=%s):\n%s",
280
+ meta["is_diff"],
281
+ diff_snapshot,
282
+ )
283
+
284
+ # Update full snapshot if page changed
285
+ if meta["is_diff"] and not diff_snapshot.startswith(
286
+ "- Page Snapshot (no structural changes)"
287
+ ):
288
+ assert self._session.snapshot is not None
289
+ full_snapshot = self._session.snapshot.snapshot_data or ""
290
+
291
+ action = self._llm_call(
292
+ prompt,
293
+ full_snapshot or "",
294
+ is_initial=False,
295
+ history=self.action_history,
296
+ ).get("action")
297
+ steps += 1
298
+
299
+ logger.info("Process completed with %d steps", steps)
300
+
301
+ async def _run_action(
302
+ self, action: Dict[str, Any]
303
+ ) -> Union[str, Dict[str, Any]]:
304
+ r"""Execute a single action and return the result."""
305
+ if action.get("type") == "navigate":
306
+ return await self.navigate(action.get("url", ""))
307
+ return await self._session.exec_action(action)
308
+
309
+ async def close(self):
310
+ r"""Clean up browser session and resources."""
311
+ await self._session.close()