camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/_utils.py +38 -0
  4. camel/agents/chat_agent.py +4014 -410
  5. camel/agents/mcp_agent.py +30 -27
  6. camel/agents/repo_agent.py +2 -1
  7. camel/benchmarks/browsecomp.py +6 -6
  8. camel/configs/__init__.py +15 -0
  9. camel/configs/aihubmix_config.py +88 -0
  10. camel/configs/amd_config.py +70 -0
  11. camel/configs/cometapi_config.py +104 -0
  12. camel/configs/minimax_config.py +93 -0
  13. camel/configs/nebius_config.py +103 -0
  14. camel/configs/vllm_config.py +2 -0
  15. camel/data_collectors/alpaca_collector.py +15 -6
  16. camel/datagen/self_improving_cot.py +1 -1
  17. camel/datasets/base_generator.py +39 -10
  18. camel/environments/__init__.py +12 -0
  19. camel/environments/rlcards_env.py +860 -0
  20. camel/environments/single_step.py +28 -3
  21. camel/environments/tic_tac_toe.py +1 -1
  22. camel/interpreters/__init__.py +2 -0
  23. camel/interpreters/docker/Dockerfile +4 -16
  24. camel/interpreters/docker_interpreter.py +3 -2
  25. camel/interpreters/e2b_interpreter.py +34 -1
  26. camel/interpreters/internal_python_interpreter.py +51 -2
  27. camel/interpreters/microsandbox_interpreter.py +395 -0
  28. camel/loaders/__init__.py +11 -2
  29. camel/loaders/base_loader.py +85 -0
  30. camel/loaders/chunkr_reader.py +9 -0
  31. camel/loaders/firecrawl_reader.py +4 -4
  32. camel/logger.py +1 -1
  33. camel/memories/agent_memories.py +84 -1
  34. camel/memories/base.py +34 -0
  35. camel/memories/blocks/chat_history_block.py +122 -4
  36. camel/memories/blocks/vectordb_block.py +8 -1
  37. camel/memories/context_creators/score_based.py +29 -237
  38. camel/memories/records.py +88 -8
  39. camel/messages/base.py +166 -40
  40. camel/messages/func_message.py +32 -5
  41. camel/models/__init__.py +10 -0
  42. camel/models/aihubmix_model.py +83 -0
  43. camel/models/aiml_model.py +1 -16
  44. camel/models/amd_model.py +101 -0
  45. camel/models/anthropic_model.py +117 -18
  46. camel/models/aws_bedrock_model.py +2 -33
  47. camel/models/azure_openai_model.py +205 -91
  48. camel/models/base_audio_model.py +3 -1
  49. camel/models/base_model.py +189 -24
  50. camel/models/cohere_model.py +5 -17
  51. camel/models/cometapi_model.py +83 -0
  52. camel/models/crynux_model.py +1 -16
  53. camel/models/deepseek_model.py +6 -16
  54. camel/models/fish_audio_model.py +6 -0
  55. camel/models/gemini_model.py +71 -20
  56. camel/models/groq_model.py +1 -17
  57. camel/models/internlm_model.py +1 -16
  58. camel/models/litellm_model.py +49 -32
  59. camel/models/lmstudio_model.py +1 -17
  60. camel/models/minimax_model.py +83 -0
  61. camel/models/mistral_model.py +1 -16
  62. camel/models/model_factory.py +27 -1
  63. camel/models/model_manager.py +24 -6
  64. camel/models/modelscope_model.py +1 -16
  65. camel/models/moonshot_model.py +185 -19
  66. camel/models/nebius_model.py +83 -0
  67. camel/models/nemotron_model.py +0 -5
  68. camel/models/netmind_model.py +1 -16
  69. camel/models/novita_model.py +1 -16
  70. camel/models/nvidia_model.py +1 -16
  71. camel/models/ollama_model.py +4 -19
  72. camel/models/openai_compatible_model.py +171 -46
  73. camel/models/openai_model.py +205 -77
  74. camel/models/openrouter_model.py +1 -17
  75. camel/models/ppio_model.py +1 -16
  76. camel/models/qianfan_model.py +1 -16
  77. camel/models/qwen_model.py +1 -16
  78. camel/models/reka_model.py +1 -16
  79. camel/models/samba_model.py +34 -47
  80. camel/models/sglang_model.py +64 -31
  81. camel/models/siliconflow_model.py +1 -16
  82. camel/models/stub_model.py +0 -4
  83. camel/models/togetherai_model.py +1 -16
  84. camel/models/vllm_model.py +1 -16
  85. camel/models/volcano_model.py +0 -17
  86. camel/models/watsonx_model.py +1 -16
  87. camel/models/yi_model.py +1 -16
  88. camel/models/zhipuai_model.py +60 -16
  89. camel/parsers/__init__.py +18 -0
  90. camel/parsers/mcp_tool_call_parser.py +176 -0
  91. camel/retrievers/auto_retriever.py +1 -0
  92. camel/runtimes/configs.py +11 -11
  93. camel/runtimes/daytona_runtime.py +15 -16
  94. camel/runtimes/docker_runtime.py +6 -6
  95. camel/runtimes/remote_http_runtime.py +5 -5
  96. camel/services/agent_openapi_server.py +380 -0
  97. camel/societies/__init__.py +2 -0
  98. camel/societies/role_playing.py +26 -28
  99. camel/societies/workforce/__init__.py +2 -0
  100. camel/societies/workforce/events.py +122 -0
  101. camel/societies/workforce/prompts.py +249 -38
  102. camel/societies/workforce/role_playing_worker.py +82 -20
  103. camel/societies/workforce/single_agent_worker.py +634 -34
  104. camel/societies/workforce/structured_output_handler.py +512 -0
  105. camel/societies/workforce/task_channel.py +169 -23
  106. camel/societies/workforce/utils.py +176 -9
  107. camel/societies/workforce/worker.py +77 -23
  108. camel/societies/workforce/workflow_memory_manager.py +772 -0
  109. camel/societies/workforce/workforce.py +3168 -478
  110. camel/societies/workforce/workforce_callback.py +74 -0
  111. camel/societies/workforce/workforce_logger.py +203 -175
  112. camel/societies/workforce/workforce_metrics.py +33 -0
  113. camel/storages/__init__.py +4 -0
  114. camel/storages/key_value_storages/json.py +15 -2
  115. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  116. camel/storages/object_storages/google_cloud.py +1 -1
  117. camel/storages/vectordb_storages/__init__.py +6 -0
  118. camel/storages/vectordb_storages/chroma.py +731 -0
  119. camel/storages/vectordb_storages/oceanbase.py +13 -13
  120. camel/storages/vectordb_storages/pgvector.py +349 -0
  121. camel/storages/vectordb_storages/qdrant.py +3 -3
  122. camel/storages/vectordb_storages/surreal.py +365 -0
  123. camel/storages/vectordb_storages/tidb.py +8 -6
  124. camel/tasks/task.py +244 -27
  125. camel/toolkits/__init__.py +46 -8
  126. camel/toolkits/aci_toolkit.py +64 -19
  127. camel/toolkits/arxiv_toolkit.py +6 -6
  128. camel/toolkits/base.py +63 -5
  129. camel/toolkits/code_execution.py +28 -1
  130. camel/toolkits/context_summarizer_toolkit.py +684 -0
  131. camel/toolkits/craw4ai_toolkit.py +93 -0
  132. camel/toolkits/dappier_toolkit.py +10 -6
  133. camel/toolkits/dingtalk.py +1135 -0
  134. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  135. camel/toolkits/excel_toolkit.py +901 -67
  136. camel/toolkits/file_toolkit.py +1402 -0
  137. camel/toolkits/function_tool.py +30 -6
  138. camel/toolkits/github_toolkit.py +107 -20
  139. camel/toolkits/gmail_toolkit.py +1839 -0
  140. camel/toolkits/google_calendar_toolkit.py +38 -4
  141. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  142. camel/toolkits/human_toolkit.py +34 -10
  143. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  144. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  145. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  146. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  147. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  148. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
  149. camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
  150. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  151. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
  152. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  153. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
  154. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  155. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  156. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  157. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  158. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
  159. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  160. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  161. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
  162. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  163. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  164. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  165. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  166. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  167. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  168. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  169. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  170. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  171. camel/toolkits/image_generation_toolkit.py +390 -0
  172. camel/toolkits/jina_reranker_toolkit.py +3 -4
  173. camel/toolkits/klavis_toolkit.py +5 -1
  174. camel/toolkits/markitdown_toolkit.py +104 -0
  175. camel/toolkits/math_toolkit.py +64 -10
  176. camel/toolkits/mcp_toolkit.py +370 -45
  177. camel/toolkits/memory_toolkit.py +5 -1
  178. camel/toolkits/message_agent_toolkit.py +608 -0
  179. camel/toolkits/message_integration.py +724 -0
  180. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  181. camel/toolkits/note_taking_toolkit.py +277 -0
  182. camel/toolkits/notion_mcp_toolkit.py +224 -0
  183. camel/toolkits/openbb_toolkit.py +5 -1
  184. camel/toolkits/origene_mcp_toolkit.py +56 -0
  185. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  186. camel/toolkits/pptx_toolkit.py +25 -12
  187. camel/toolkits/resend_toolkit.py +168 -0
  188. camel/toolkits/screenshot_toolkit.py +213 -0
  189. camel/toolkits/search_toolkit.py +437 -142
  190. camel/toolkits/slack_toolkit.py +104 -50
  191. camel/toolkits/sympy_toolkit.py +1 -1
  192. camel/toolkits/task_planning_toolkit.py +3 -3
  193. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  194. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  195. camel/toolkits/terminal_toolkit/utils.py +532 -0
  196. camel/toolkits/thinking_toolkit.py +1 -1
  197. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  198. camel/toolkits/video_analysis_toolkit.py +106 -26
  199. camel/toolkits/video_download_toolkit.py +17 -14
  200. camel/toolkits/web_deploy_toolkit.py +1219 -0
  201. camel/toolkits/wechat_official_toolkit.py +483 -0
  202. camel/toolkits/zapier_toolkit.py +5 -1
  203. camel/types/__init__.py +2 -2
  204. camel/types/agents/tool_calling_record.py +4 -1
  205. camel/types/enums.py +316 -40
  206. camel/types/openai_types.py +2 -2
  207. camel/types/unified_model_type.py +31 -4
  208. camel/utils/commons.py +36 -5
  209. camel/utils/constants.py +3 -0
  210. camel/utils/context_utils.py +1003 -0
  211. camel/utils/mcp.py +138 -4
  212. camel/utils/mcp_client.py +45 -1
  213. camel/utils/message_summarizer.py +148 -0
  214. camel/utils/token_counting.py +43 -20
  215. camel/utils/tool_result.py +44 -0
  216. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
  217. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
  218. camel/loaders/pandas_reader.py +0 -368
  219. camel/toolkits/dalle_toolkit.py +0 -175
  220. camel/toolkits/file_write_toolkit.py +0 -444
  221. camel/toolkits/openai_agent_toolkit.py +0 -135
  222. camel/toolkits/terminal_toolkit.py +0 -1037
  223. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  224. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,17 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
16
+
17
+ __all__ = ["HybridBrowserToolkit"]
@@ -0,0 +1,575 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import asyncio
15
+ from typing import TYPE_CHECKING, Any, Dict, Optional
16
+
17
+ from .config_loader import ConfigLoader
18
+
19
+ if TYPE_CHECKING:
20
+ from playwright.async_api import Page
21
+
22
+
23
+ class ActionExecutor:
24
+ r"""Executes high-level actions (click, type …) on a Playwright Page."""
25
+
26
+ def __init__(
27
+ self,
28
+ page: "Page",
29
+ session: Optional[Any] = None,
30
+ default_timeout: Optional[int] = None,
31
+ short_timeout: Optional[int] = None,
32
+ max_scroll_amount: Optional[int] = None,
33
+ ):
34
+ self.page = page
35
+ self.session = session # HybridBrowserSession instance
36
+
37
+ # Configure timeouts using the config file with optional overrides
38
+ self.default_timeout = ConfigLoader.get_action_timeout(default_timeout)
39
+ self.short_timeout = ConfigLoader.get_short_timeout(short_timeout)
40
+ self.max_scroll_amount = ConfigLoader.get_max_scroll_amount(
41
+ max_scroll_amount
42
+ )
43
+
44
+ # ------------------------------------------------------------------
45
+ # Public helpers
46
+ # ------------------------------------------------------------------
47
+ async def execute(self, action: Dict[str, Any]) -> Dict[str, Any]:
48
+ r"""Execute an action and return detailed result information."""
49
+ if not action:
50
+ return {
51
+ "success": False,
52
+ "message": "No action to execute",
53
+ "details": {},
54
+ }
55
+
56
+ action_type = action.get("type")
57
+ if not action_type:
58
+ return {
59
+ "success": False,
60
+ "message": "Error: action has no type",
61
+ "details": {},
62
+ }
63
+
64
+ try:
65
+ # small helper to ensure basic stability
66
+ # await self._wait_dom_stable()
67
+
68
+ handler = {
69
+ "click": self._click,
70
+ "type": self._type,
71
+ "select": self._select,
72
+ "wait": self._wait,
73
+ "extract": self._extract,
74
+ "scroll": self._scroll,
75
+ "enter": self._enter,
76
+ "mouse_control": self._mouse_control,
77
+ "mouse_drag": self._mouse_drag,
78
+ "press_key": self._press_key,
79
+ }.get(action_type)
80
+
81
+ if handler is None:
82
+ return {
83
+ "success": False,
84
+ "message": f"Error: Unknown action type '{action_type}'",
85
+ "details": {"action_type": action_type},
86
+ }
87
+
88
+ result = await handler(action)
89
+ return {
90
+ "success": True,
91
+ "message": result["message"],
92
+ "details": result.get("details", {}),
93
+ }
94
+ except Exception as exc:
95
+ return {
96
+ "success": False,
97
+ "message": f"Error executing {action_type}: {exc}",
98
+ "details": {"action_type": action_type, "error": str(exc)},
99
+ }
100
+
101
+ # ------------------------------------------------------------------
102
+ # Internal handlers
103
+ # ------------------------------------------------------------------
104
+ async def _click(self, action: Dict[str, Any]) -> Dict[str, Any]:
105
+ r"""Handle click actions with new tab support for any clickable
106
+ element."""
107
+ ref = action.get("ref")
108
+ text = action.get("text")
109
+ selector = action.get("selector")
110
+ if not (ref or text or selector):
111
+ return {
112
+ "message": "Error: click requires ref/text/selector",
113
+ "details": {"error": "missing_selector"},
114
+ }
115
+
116
+ # Build strategies in priority order
117
+ strategies = []
118
+ if ref:
119
+ strategies.append(f"[aria-ref='{ref}']")
120
+ if selector:
121
+ strategies.append(selector)
122
+ if text:
123
+ strategies.append(f'text="{text}"')
124
+
125
+ details: Dict[str, Any] = {
126
+ "ref": ref,
127
+ "selector": selector,
128
+ "text": text,
129
+ "strategies_tried": [],
130
+ "successful_strategy": None,
131
+ "click_method": None,
132
+ "new_tab_created": False,
133
+ }
134
+
135
+ # Find the first valid selector
136
+ found_selector = None
137
+ for sel in strategies:
138
+ if await self.page.locator(sel).count() > 0:
139
+ found_selector = sel
140
+ break
141
+
142
+ if not found_selector:
143
+ details['error'] = "Element not found with any strategy"
144
+ return {
145
+ "message": "Error: Click failed, element not found",
146
+ "details": details,
147
+ }
148
+
149
+ element = self.page.locator(found_selector).first
150
+ details['successful_strategy'] = found_selector
151
+
152
+ # Attempt ctrl+click first (always)
153
+ try:
154
+ if self.session:
155
+ async with self.page.context.expect_page(
156
+ timeout=self.short_timeout
157
+ ) as new_page_info:
158
+ await element.click(modifiers=["ControlOrMeta"])
159
+ new_page = await new_page_info.value
160
+ await new_page.wait_for_load_state('domcontentloaded')
161
+ new_tab_index = await self.session.register_page(new_page)
162
+ if new_tab_index is not None:
163
+ await self.session.switch_to_tab(new_tab_index)
164
+ self.page = new_page
165
+ details.update(
166
+ {
167
+ "click_method": "ctrl_click_new_tab",
168
+ "new_tab_created": True,
169
+ "new_tab_index": new_tab_index,
170
+ }
171
+ )
172
+ return {
173
+ "message": f"Clicked element (ctrl click), opened in new "
174
+ f"tab {new_tab_index}",
175
+ "details": details,
176
+ }
177
+ else:
178
+ await element.click(modifiers=["ControlOrMeta"])
179
+ details["click_method"] = "ctrl_click_no_session"
180
+ return {
181
+ "message": f"Clicked element (ctrl click, no"
182
+ f" session): {found_selector}",
183
+ "details": details,
184
+ }
185
+ except asyncio.TimeoutError:
186
+ # No new tab was opened, click may have still worked
187
+ details["click_method"] = "ctrl_click_same_tab"
188
+ return {
189
+ "message": f"Clicked element (ctrl click, "
190
+ f"same tab): {found_selector}",
191
+ "details": details,
192
+ }
193
+ except Exception as e:
194
+ details['strategies_tried'].append(
195
+ {
196
+ 'selector': found_selector,
197
+ 'method': 'ctrl_click',
198
+ 'error': str(e),
199
+ }
200
+ )
201
+ # Fall through to fallback
202
+
203
+ # Fallback to normal force click if ctrl+click fails
204
+ try:
205
+ await element.click(force=True, timeout=self.default_timeout)
206
+ details["click_method"] = "playwright_force_click"
207
+ return {
208
+ "message": f"Fallback clicked element: {found_selector}",
209
+ "details": details,
210
+ }
211
+ except Exception as e:
212
+ details["click_method"] = "playwright_force_click_failed"
213
+ details["error"] = str(e)
214
+ return {
215
+ "message": f"Error: All click strategies "
216
+ f"failed for {found_selector}",
217
+ "details": details,
218
+ }
219
+
220
+ async def _type(self, action: Dict[str, Any]) -> Dict[str, Any]:
221
+ r"""Handle typing text into input fields."""
222
+ ref = action.get("ref")
223
+ selector = action.get("selector")
224
+ text = action.get("text", "")
225
+ if not (ref or selector):
226
+ return {
227
+ "message": "Error: type requires ref/selector",
228
+ "details": {"error": "missing_selector"},
229
+ }
230
+
231
+ target = selector or f"[aria-ref='{ref}']"
232
+ details = {
233
+ "ref": ref,
234
+ "selector": selector,
235
+ "target": target,
236
+ "text": text,
237
+ "text_length": len(text),
238
+ }
239
+
240
+ try:
241
+ await self.page.fill(target, text, timeout=self.short_timeout)
242
+ return {
243
+ "message": f"Typed '{text}' into {target}",
244
+ "details": details,
245
+ }
246
+ except Exception as exc:
247
+ details["error"] = str(exc)
248
+ return {"message": f"Type failed: {exc}", "details": details}
249
+
250
+ async def _select(self, action: Dict[str, Any]) -> Dict[str, Any]:
251
+ r"""Handle selecting options from dropdowns."""
252
+ ref = action.get("ref")
253
+ selector = action.get("selector")
254
+ value = action.get("value", "")
255
+ if not (ref or selector):
256
+ return {
257
+ "message": "Error: select requires ref/selector",
258
+ "details": {"error": "missing_selector"},
259
+ }
260
+
261
+ target = selector or f"[aria-ref='{ref}']"
262
+ details = {
263
+ "ref": ref,
264
+ "selector": selector,
265
+ "target": target,
266
+ "value": value,
267
+ }
268
+
269
+ try:
270
+ await self.page.select_option(
271
+ target, value, timeout=self.default_timeout
272
+ )
273
+ return {
274
+ "message": f"Selected '{value}' in {target}",
275
+ "details": details,
276
+ }
277
+ except Exception as exc:
278
+ details["error"] = str(exc)
279
+ return {"message": f"Select failed: {exc}", "details": details}
280
+
281
+ async def _wait(self, action: Dict[str, Any]) -> Dict[str, Any]:
282
+ r"""Handle wait actions."""
283
+ details: Dict[str, Any] = {
284
+ "wait_type": None,
285
+ "timeout": None,
286
+ "selector": None,
287
+ }
288
+
289
+ if "timeout" in action:
290
+ ms = int(action["timeout"])
291
+ details["wait_type"] = "timeout"
292
+ details["timeout"] = ms
293
+ await asyncio.sleep(ms / 1000)
294
+ return {"message": f"Waited {ms}ms", "details": details}
295
+ if "selector" in action:
296
+ sel = action["selector"]
297
+ details["wait_type"] = "selector"
298
+ details["selector"] = sel
299
+ await self.page.wait_for_selector(
300
+ sel, timeout=self.default_timeout
301
+ )
302
+ return {"message": f"Waited for {sel}", "details": details}
303
+ return {
304
+ "message": "Error: wait requires timeout/selector",
305
+ "details": details,
306
+ }
307
+
308
+ async def _extract(self, action: Dict[str, Any]) -> Dict[str, Any]:
309
+ r"""Handle text extraction from elements."""
310
+ ref = action.get("ref")
311
+ if not ref:
312
+ return {
313
+ "message": "Error: extract requires ref",
314
+ "details": {"error": "missing_ref"},
315
+ }
316
+
317
+ target = f"[aria-ref='{ref}']"
318
+ details = {"ref": ref, "target": target}
319
+
320
+ await self.page.wait_for_selector(target, timeout=self.default_timeout)
321
+ txt = await self.page.text_content(target)
322
+
323
+ details["extracted_text"] = txt
324
+ details["text_length"] = len(txt) if txt else 0
325
+
326
+ return {
327
+ "message": f"Extracted: {txt[:100] if txt else 'None'}",
328
+ "details": details,
329
+ }
330
+
331
+ async def _scroll(self, action: Dict[str, Any]) -> Dict[str, Any]:
332
+ r"""Handle page scrolling with safe parameter validation."""
333
+ direction = action.get("direction", "down")
334
+ amount = action.get("amount", 300)
335
+
336
+ details = {
337
+ "direction": direction,
338
+ "requested_amount": amount,
339
+ "actual_amount": None,
340
+ "scroll_offset": None,
341
+ }
342
+
343
+ # Validate inputs to prevent injection
344
+ if direction not in ("up", "down"):
345
+ return {
346
+ "message": "Error: direction must be 'up' or 'down'",
347
+ "details": details,
348
+ }
349
+
350
+ try:
351
+ # Safely convert amount to integer and clamp to reasonable range
352
+ amount_int = int(amount)
353
+ amount_int = max(
354
+ -self.max_scroll_amount,
355
+ min(self.max_scroll_amount, amount_int),
356
+ ) # Clamp to max_scroll_amount range
357
+ details["actual_amount"] = amount_int
358
+ except (ValueError, TypeError):
359
+ return {
360
+ "message": "Error: amount must be a valid number",
361
+ "details": details,
362
+ }
363
+
364
+ # Use safe evaluation with bound parameters
365
+ scroll_offset = amount_int if direction == "down" else -amount_int
366
+ details["scroll_offset"] = scroll_offset
367
+
368
+ await self.page.evaluate(
369
+ "offset => window.scrollBy(0, offset)", scroll_offset
370
+ )
371
+ await asyncio.sleep(0.5)
372
+ return {
373
+ "message": f"Scrolled {direction} by {abs(amount_int)}px",
374
+ "details": details,
375
+ }
376
+
377
+ async def _enter(self, action: Dict[str, Any]) -> Dict[str, Any]:
378
+ r"""Handle Enter key press on the currently focused element."""
379
+ details = {"action_type": "enter", "target": "focused_element"}
380
+
381
+ # Press Enter on whatever element currently has focus
382
+ await self.page.keyboard.press("Enter")
383
+ return {
384
+ "message": "Pressed Enter on focused element",
385
+ "details": details,
386
+ }
387
+
388
+ async def _mouse_control(self, action: Dict[str, Any]) -> Dict[str, Any]:
389
+ r"""Handle mouse_control action based on the coordinates"""
390
+ control = action.get("control", "click")
391
+ x_coord = action.get("x", 0)
392
+ y_coord = action.get("y", 0)
393
+
394
+ details = {
395
+ "action_type": "mouse_control",
396
+ "target": f"coordinates : ({x_coord}, {y_coord})",
397
+ }
398
+ try:
399
+ if not self._valid_coordinates(x_coord, y_coord):
400
+ raise ValueError(
401
+ "Invalid coordinates, outside viewport bounds :"
402
+ f"({x_coord}, {y_coord})"
403
+ )
404
+ match control:
405
+ case "click":
406
+ await self.page.mouse.click(x_coord, y_coord)
407
+ message = "Action 'click' performed on the target"
408
+
409
+ case "right_click":
410
+ await self.page.mouse.click(
411
+ x_coord, y_coord, button="right"
412
+ )
413
+ message = "Action 'right_click' performed on the target"
414
+
415
+ case "dblclick":
416
+ await self.page.mouse.dblclick(x_coord, y_coord)
417
+ message = "Action 'dblclick' performed on the target"
418
+
419
+ case _:
420
+ return {
421
+ "message": f"Invalid control action {control}",
422
+ "details": details,
423
+ }
424
+
425
+ return {"message": message, "details": details}
426
+ except Exception as e:
427
+ return {"message": f"Action failed: {e}", "details": details}
428
+
429
+ async def _mouse_drag(self, action: Dict[str, Any]) -> Dict[str, Any]:
430
+ r"""Handle mouse_drag action using ref IDs"""
431
+ from_ref = action.get("from_ref")
432
+ to_ref = action.get("to_ref")
433
+
434
+ if not from_ref or not to_ref:
435
+ return {
436
+ "message": "Error: mouse_drag requires from_ref and to_ref",
437
+ "details": {"error": "missing_refs"},
438
+ }
439
+
440
+ from_selector = f"[aria-ref='{from_ref}']"
441
+ to_selector = f"[aria-ref='{to_ref}']"
442
+
443
+ details = {
444
+ "action_type": "mouse_drag",
445
+ "from_ref": from_ref,
446
+ "to_ref": to_ref,
447
+ "from_selector": from_selector,
448
+ "to_selector": to_selector,
449
+ }
450
+
451
+ try:
452
+ # Get the source element
453
+ from_element = self.page.locator(from_selector)
454
+ from_count = await from_element.count()
455
+ if from_count == 0:
456
+ raise ValueError(
457
+ f"Source element with ref '{from_ref}' not found"
458
+ )
459
+
460
+ # Get the target element
461
+ to_element = self.page.locator(to_selector)
462
+ to_count = await to_element.count()
463
+ if to_count == 0:
464
+ raise ValueError(
465
+ f"Target element with ref '{to_ref}' not found"
466
+ )
467
+
468
+ # Get bounding boxes
469
+ from_box = await from_element.first.bounding_box()
470
+ to_box = await to_element.first.bounding_box()
471
+
472
+ if not from_box:
473
+ raise ValueError(
474
+ f"Could not get bounding box for source element "
475
+ f"with ref '{from_ref}'"
476
+ )
477
+ if not to_box:
478
+ raise ValueError(
479
+ f"Could not get bounding box for target element "
480
+ f"with ref '{to_ref}'"
481
+ )
482
+
483
+ # Calculate center coordinates
484
+ from_x = from_box['x'] + from_box['width'] / 2
485
+ from_y = from_box['y'] + from_box['height'] / 2
486
+ to_x = to_box['x'] + to_box['width'] / 2
487
+ to_y = to_box['y'] + to_box['height'] / 2
488
+
489
+ details.update(
490
+ {
491
+ "from_coordinates": {"x": from_x, "y": from_y},
492
+ "to_coordinates": {"x": to_x, "y": to_y},
493
+ }
494
+ )
495
+
496
+ # Perform the drag operation
497
+ await self.page.mouse.move(from_x, from_y)
498
+ await self.page.mouse.down()
499
+ # Destination coordinates
500
+ await self.page.mouse.move(to_x, to_y)
501
+ await self.page.mouse.up()
502
+
503
+ return {
504
+ "message": (
505
+ f"Dragged from element [ref={from_ref}] to element "
506
+ f"[ref={to_ref}]"
507
+ ),
508
+ "details": details,
509
+ }
510
+ except Exception as e:
511
+ return {"message": f"Action failed: {e}", "details": details}
512
+
513
+ async def _press_key(self, action: Dict[str, Any]) -> Dict[str, Any]:
514
+ r"""Handle press_key action by combining the keys in a list."""
515
+ keys = action.get("keys", [])
516
+ if not keys:
517
+ return {
518
+ "message": "Error: No keys specified",
519
+ "details": {"action_type": "press_key", "keys": ""},
520
+ }
521
+ combined_keys = "+".join(keys)
522
+ details = {"action_type": "press_key", "keys": combined_keys}
523
+ try:
524
+ await self.page.keyboard.press(combined_keys)
525
+ return {
526
+ "message": "Pressed keys in the browser",
527
+ "details": details,
528
+ }
529
+ except Exception as e:
530
+ return {"message": f"Action failed: {e}", "details": details}
531
+
532
+ # utilities
533
+ async def _wait_dom_stable(self) -> None:
534
+ r"""Wait for DOM to become stable before executing actions."""
535
+ try:
536
+ # Wait for basic DOM content loading
537
+ await self.page.wait_for_load_state(
538
+ 'domcontentloaded', timeout=self.short_timeout
539
+ )
540
+
541
+ # Try to wait for network idle briefly
542
+ try:
543
+ await self.page.wait_for_load_state(
544
+ 'networkidle', timeout=self.short_timeout
545
+ )
546
+ except Exception:
547
+ pass # Network idle is optional
548
+
549
+ except Exception:
550
+ pass # Don't fail if wait times out
551
+
552
+ def _valid_coordinates(self, x_coord: float, y_coord: float) -> bool:
553
+ r"""Validate given coordinates against viewport bounds."""
554
+ viewport = self.page.viewport_size
555
+ if not viewport:
556
+ raise ValueError("Viewport size not available from current page.")
557
+
558
+ return (
559
+ 0 <= x_coord <= viewport['width']
560
+ and 0 <= y_coord <= viewport['height']
561
+ )
562
+
563
+ # static helpers
564
+ @staticmethod
565
+ def should_update_snapshot(action: Dict[str, Any]) -> bool:
566
+ r"""Determine if an action requires a snapshot update."""
567
+ change_types = {
568
+ "click",
569
+ "type",
570
+ "select",
571
+ "scroll",
572
+ "navigate",
573
+ "enter",
574
+ }
575
+ return action.get("type") in change_types