camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_utils.py +38 -0
  3. camel/agents/chat_agent.py +2217 -519
  4. camel/agents/mcp_agent.py +30 -27
  5. camel/configs/__init__.py +15 -0
  6. camel/configs/aihubmix_config.py +88 -0
  7. camel/configs/amd_config.py +70 -0
  8. camel/configs/cometapi_config.py +104 -0
  9. camel/configs/minimax_config.py +93 -0
  10. camel/configs/nebius_config.py +103 -0
  11. camel/data_collectors/alpaca_collector.py +15 -6
  12. camel/datasets/base_generator.py +39 -10
  13. camel/environments/single_step.py +28 -3
  14. camel/environments/tic_tac_toe.py +1 -1
  15. camel/interpreters/__init__.py +2 -0
  16. camel/interpreters/docker/Dockerfile +3 -12
  17. camel/interpreters/e2b_interpreter.py +34 -1
  18. camel/interpreters/microsandbox_interpreter.py +395 -0
  19. camel/loaders/__init__.py +11 -2
  20. camel/loaders/chunkr_reader.py +9 -0
  21. camel/memories/agent_memories.py +48 -4
  22. camel/memories/base.py +26 -0
  23. camel/memories/blocks/chat_history_block.py +122 -4
  24. camel/memories/context_creators/score_based.py +25 -384
  25. camel/memories/records.py +88 -8
  26. camel/messages/base.py +153 -34
  27. camel/models/__init__.py +10 -0
  28. camel/models/aihubmix_model.py +83 -0
  29. camel/models/aiml_model.py +1 -16
  30. camel/models/amd_model.py +101 -0
  31. camel/models/anthropic_model.py +6 -19
  32. camel/models/aws_bedrock_model.py +2 -33
  33. camel/models/azure_openai_model.py +114 -89
  34. camel/models/base_audio_model.py +3 -1
  35. camel/models/base_model.py +32 -14
  36. camel/models/cohere_model.py +1 -16
  37. camel/models/cometapi_model.py +83 -0
  38. camel/models/crynux_model.py +1 -16
  39. camel/models/deepseek_model.py +1 -16
  40. camel/models/fish_audio_model.py +6 -0
  41. camel/models/gemini_model.py +36 -18
  42. camel/models/groq_model.py +1 -17
  43. camel/models/internlm_model.py +1 -16
  44. camel/models/litellm_model.py +1 -16
  45. camel/models/lmstudio_model.py +1 -17
  46. camel/models/minimax_model.py +83 -0
  47. camel/models/mistral_model.py +1 -16
  48. camel/models/model_factory.py +27 -1
  49. camel/models/modelscope_model.py +1 -16
  50. camel/models/moonshot_model.py +105 -24
  51. camel/models/nebius_model.py +83 -0
  52. camel/models/nemotron_model.py +0 -5
  53. camel/models/netmind_model.py +1 -16
  54. camel/models/novita_model.py +1 -16
  55. camel/models/nvidia_model.py +1 -16
  56. camel/models/ollama_model.py +4 -19
  57. camel/models/openai_compatible_model.py +62 -41
  58. camel/models/openai_model.py +62 -57
  59. camel/models/openrouter_model.py +1 -17
  60. camel/models/ppio_model.py +1 -16
  61. camel/models/qianfan_model.py +1 -16
  62. camel/models/qwen_model.py +1 -16
  63. camel/models/reka_model.py +1 -16
  64. camel/models/samba_model.py +34 -47
  65. camel/models/sglang_model.py +64 -31
  66. camel/models/siliconflow_model.py +1 -16
  67. camel/models/stub_model.py +0 -4
  68. camel/models/togetherai_model.py +1 -16
  69. camel/models/vllm_model.py +1 -16
  70. camel/models/volcano_model.py +0 -17
  71. camel/models/watsonx_model.py +1 -16
  72. camel/models/yi_model.py +1 -16
  73. camel/models/zhipuai_model.py +60 -16
  74. camel/parsers/__init__.py +18 -0
  75. camel/parsers/mcp_tool_call_parser.py +176 -0
  76. camel/retrievers/auto_retriever.py +1 -0
  77. camel/runtimes/daytona_runtime.py +11 -12
  78. camel/societies/__init__.py +2 -0
  79. camel/societies/workforce/__init__.py +2 -0
  80. camel/societies/workforce/events.py +122 -0
  81. camel/societies/workforce/prompts.py +146 -66
  82. camel/societies/workforce/role_playing_worker.py +15 -11
  83. camel/societies/workforce/single_agent_worker.py +302 -65
  84. camel/societies/workforce/structured_output_handler.py +30 -18
  85. camel/societies/workforce/task_channel.py +163 -27
  86. camel/societies/workforce/utils.py +107 -13
  87. camel/societies/workforce/workflow_memory_manager.py +772 -0
  88. camel/societies/workforce/workforce.py +1949 -579
  89. camel/societies/workforce/workforce_callback.py +74 -0
  90. camel/societies/workforce/workforce_logger.py +168 -145
  91. camel/societies/workforce/workforce_metrics.py +33 -0
  92. camel/storages/key_value_storages/json.py +15 -2
  93. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  94. camel/storages/object_storages/google_cloud.py +1 -1
  95. camel/storages/vectordb_storages/oceanbase.py +13 -13
  96. camel/storages/vectordb_storages/qdrant.py +3 -3
  97. camel/storages/vectordb_storages/tidb.py +8 -6
  98. camel/tasks/task.py +4 -3
  99. camel/toolkits/__init__.py +20 -7
  100. camel/toolkits/aci_toolkit.py +45 -0
  101. camel/toolkits/base.py +6 -4
  102. camel/toolkits/code_execution.py +28 -1
  103. camel/toolkits/context_summarizer_toolkit.py +684 -0
  104. camel/toolkits/dappier_toolkit.py +5 -1
  105. camel/toolkits/dingtalk.py +1135 -0
  106. camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
  107. camel/toolkits/excel_toolkit.py +1 -1
  108. camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
  109. camel/toolkits/function_tool.py +13 -3
  110. camel/toolkits/github_toolkit.py +104 -17
  111. camel/toolkits/gmail_toolkit.py +1839 -0
  112. camel/toolkits/google_calendar_toolkit.py +38 -4
  113. camel/toolkits/google_drive_mcp_toolkit.py +12 -31
  114. camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
  115. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
  116. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
  117. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  118. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
  119. camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
  120. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
  121. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
  122. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
  123. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  124. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  125. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  126. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
  127. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
  128. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
  129. camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
  130. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
  131. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
  132. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
  133. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
  134. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
  135. camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
  136. camel/toolkits/klavis_toolkit.py +5 -1
  137. camel/toolkits/markitdown_toolkit.py +27 -1
  138. camel/toolkits/math_toolkit.py +64 -10
  139. camel/toolkits/mcp_toolkit.py +366 -71
  140. camel/toolkits/memory_toolkit.py +5 -1
  141. camel/toolkits/message_integration.py +18 -13
  142. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  143. camel/toolkits/note_taking_toolkit.py +19 -10
  144. camel/toolkits/notion_mcp_toolkit.py +16 -26
  145. camel/toolkits/openbb_toolkit.py +5 -1
  146. camel/toolkits/origene_mcp_toolkit.py +8 -49
  147. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  148. camel/toolkits/resend_toolkit.py +168 -0
  149. camel/toolkits/search_toolkit.py +264 -91
  150. camel/toolkits/slack_toolkit.py +64 -10
  151. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  152. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  153. camel/toolkits/terminal_toolkit/utils.py +532 -0
  154. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  155. camel/toolkits/video_analysis_toolkit.py +17 -11
  156. camel/toolkits/wechat_official_toolkit.py +483 -0
  157. camel/toolkits/zapier_toolkit.py +5 -1
  158. camel/types/__init__.py +2 -2
  159. camel/types/enums.py +274 -7
  160. camel/types/openai_types.py +2 -2
  161. camel/types/unified_model_type.py +15 -0
  162. camel/utils/commons.py +36 -5
  163. camel/utils/constants.py +3 -0
  164. camel/utils/context_utils.py +1003 -0
  165. camel/utils/mcp.py +138 -4
  166. camel/utils/token_counting.py +43 -20
  167. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
  168. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
  169. camel/loaders/pandas_reader.py +0 -368
  170. camel/toolkits/openai_agent_toolkit.py +0 -135
  171. camel/toolkits/terminal_toolkit.py +0 -1550
  172. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  173. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -13,12 +13,14 @@
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
 
15
15
  import asyncio
16
+ import contextlib
16
17
  import datetime
17
18
  import json
18
19
  import os
19
20
  import subprocess
20
21
  import time
21
22
  import uuid
23
+ from contextvars import ContextVar
22
24
  from functools import wraps
23
25
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
24
26
 
@@ -33,34 +35,76 @@ else:
33
35
  from camel.logger import get_logger
34
36
  from camel.utils.tool_result import ToolResult
35
37
 
38
+ from .installer import check_and_install_dependencies
39
+
36
40
  logger = get_logger(__name__)
37
41
 
42
+ # Context variable to track if we're inside a high-level action
43
+ _in_high_level_action: ContextVar[bool] = ContextVar(
44
+ '_in_high_level_action', default=False
45
+ )
46
+
47
+
48
+ def _create_memory_aware_error(base_msg: str) -> str:
49
+ import psutil
50
+
51
+ mem = psutil.virtual_memory()
52
+ if mem.available < 1024**3:
53
+ return (
54
+ f"{base_msg} "
55
+ f"(likely due to insufficient memory). "
56
+ f"Available memory: {mem.available / 1024**3:.2f}GB "
57
+ f"({mem.percent}% used)"
58
+ )
59
+ return base_msg
60
+
61
+
62
+ async def _cleanup_process_and_tasks(process, log_reader_task, ts_log_file):
63
+ if process:
64
+ with contextlib.suppress(ProcessLookupError, Exception):
65
+ process.kill()
66
+ with contextlib.suppress(Exception):
67
+ process.wait(timeout=2)
68
+
69
+ if log_reader_task and not log_reader_task.done():
70
+ log_reader_task.cancel()
71
+ with contextlib.suppress(asyncio.CancelledError):
72
+ await log_reader_task
73
+
74
+ if ts_log_file:
75
+ with contextlib.suppress(Exception):
76
+ ts_log_file.close()
77
+
38
78
 
39
79
  def action_logger(func):
40
- """Decorator to add logging to action methods."""
80
+ """Decorator to add logging to action methods.
81
+
82
+ Skips logging if already inside a high-level action to avoid
83
+ logging internal calls.
84
+ """
41
85
 
42
86
  @wraps(func)
43
87
  async def wrapper(self, *args, **kwargs):
88
+ # Skip logging if we're already inside a high-level action
89
+ if _in_high_level_action.get():
90
+ return await func(self, *args, **kwargs)
91
+
44
92
  action_name = func.__name__
45
93
  start_time = time.time()
46
94
 
47
- # Log inputs (skip self)
48
95
  inputs = {
49
96
  "args": args,
50
97
  "kwargs": kwargs,
51
98
  }
52
99
 
53
100
  try:
54
- # Execute the original function
55
101
  result = await func(self, *args, **kwargs)
56
102
  execution_time = time.time() - start_time
57
103
 
58
- # Extract page load time if available
59
104
  page_load_time = None
60
105
  if isinstance(result, dict) and 'page_load_time_ms' in result:
61
106
  page_load_time = result['page_load_time_ms'] / 1000.0
62
107
 
63
- # Log success
64
108
  await self._log_action(
65
109
  action_name=action_name,
66
110
  inputs=inputs,
@@ -75,7 +119,6 @@ def action_logger(func):
75
119
  execution_time = time.time() - start_time
76
120
  error_msg = f"{type(e).__name__}: {e!s}"
77
121
 
78
- # Log error
79
122
  await self._log_action(
80
123
  action_name=action_name,
81
124
  inputs=inputs,
@@ -89,6 +132,67 @@ def action_logger(func):
89
132
  return wrapper
90
133
 
91
134
 
135
+ def high_level_action(func):
136
+ """Decorator for high-level actions that should suppress low-level logging.
137
+
138
+ When a function is decorated with this, all low-level action_logger
139
+ decorated functions called within it will skip logging. This decorator
140
+ itself will log the high-level action.
141
+ """
142
+
143
+ @wraps(func)
144
+ async def wrapper(self, *args, **kwargs):
145
+ action_name = func.__name__
146
+ start_time = time.time()
147
+
148
+ inputs = {
149
+ "args": args,
150
+ "kwargs": kwargs,
151
+ }
152
+
153
+ # Set the context variable to indicate we're in a high-level action
154
+ token = _in_high_level_action.set(True)
155
+ try:
156
+ result = await func(self, *args, **kwargs)
157
+ execution_time = time.time() - start_time
158
+
159
+ # Log the high-level action
160
+ if hasattr(self, '_get_ws_wrapper'):
161
+ # This is a HybridBrowserToolkit instance
162
+ ws_wrapper = await self._get_ws_wrapper()
163
+ await ws_wrapper._log_action(
164
+ action_name=action_name,
165
+ inputs=inputs,
166
+ outputs=result,
167
+ execution_time=execution_time,
168
+ page_load_time=None,
169
+ )
170
+
171
+ return result
172
+
173
+ except Exception as e:
174
+ execution_time = time.time() - start_time
175
+ error_msg = f"{type(e).__name__}: {e!s}"
176
+
177
+ # Log the error
178
+ if hasattr(self, '_get_ws_wrapper'):
179
+ ws_wrapper = await self._get_ws_wrapper()
180
+ await ws_wrapper._log_action(
181
+ action_name=action_name,
182
+ inputs=inputs,
183
+ outputs=None,
184
+ execution_time=execution_time,
185
+ error=error_msg,
186
+ )
187
+
188
+ raise
189
+ finally:
190
+ # Reset the context variable
191
+ _in_high_level_action.reset(token)
192
+
193
+ return wrapper
194
+
195
+
92
196
  class WebSocketBrowserWrapper:
93
197
  """Python wrapper for the TypeScript hybrid browser
94
198
  toolkit implementation using WebSocket."""
@@ -110,29 +214,35 @@ class WebSocketBrowserWrapper:
110
214
  self.process: Optional[subprocess.Popen] = None
111
215
  self.websocket = None
112
216
  self.server_port = None
113
- self._send_lock = asyncio.Lock() # Lock for sending messages
114
- self._receive_task = None # Background task for receiving messages
115
- self._pending_responses: Dict[
116
- str, asyncio.Future[Dict[str, Any]]
117
- ] = {} # Message ID -> Future
217
+ self._send_lock = asyncio.Lock()
218
+ self._receive_task = None
219
+ self._pending_responses: Dict[str, asyncio.Future[Dict[str, Any]]] = {}
220
+ self._browser_opened = False
221
+ self._server_ready_future = None
118
222
 
119
- # Logging configuration
120
223
  self.browser_log_to_file = (config or {}).get(
121
224
  'browser_log_to_file', False
122
225
  )
226
+ self.log_dir = (config or {}).get('log_dir', 'browser_log')
123
227
  self.session_id = (config or {}).get('session_id', 'default')
124
228
  self.log_file_path: Optional[str] = None
125
229
  self.log_buffer: List[Dict[str, Any]] = []
230
+ self.ts_log_file_path: Optional[str] = None
231
+ self.ts_log_file = None
232
+ self._log_reader_task = None
126
233
 
127
- # Set up log file if needed
128
234
  if self.browser_log_to_file:
129
- log_dir = "browser_log"
235
+ log_dir = self.log_dir if self.log_dir else "browser_log"
130
236
  os.makedirs(log_dir, exist_ok=True)
131
237
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
132
238
  self.log_file_path = os.path.join(
133
239
  log_dir,
134
240
  f"hybrid_browser_toolkit_ws_{timestamp}_{self.session_id}.log",
135
241
  )
242
+ self.ts_log_file_path = os.path.join(
243
+ log_dir,
244
+ f"typescript_console_{timestamp}_{self.session_id}.log",
245
+ )
136
246
 
137
247
  async def __aenter__(self):
138
248
  """Async context manager entry."""
@@ -143,155 +253,278 @@ class WebSocketBrowserWrapper:
143
253
  """Async context manager exit."""
144
254
  await self.stop()
145
255
 
256
+ async def _cleanup_existing_processes(self):
257
+ """Clean up any existing Node.js WebSocket server processes."""
258
+ import psutil
259
+
260
+ cleaned_count = 0
261
+ for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
262
+ try:
263
+ if (
264
+ proc.info['name']
265
+ and 'node' in proc.info['name'].lower()
266
+ and proc.info['cmdline']
267
+ and any(
268
+ 'websocket-server.js' in arg
269
+ for arg in proc.info['cmdline']
270
+ )
271
+ ):
272
+ if any(self.ts_dir in arg for arg in proc.info['cmdline']):
273
+ logger.warning(
274
+ f"Found existing WebSocket server process "
275
+ f"(PID: {proc.info['pid']}). "
276
+ f"Terminating it to prevent conflicts."
277
+ )
278
+ proc.terminate()
279
+ try:
280
+ proc.wait(timeout=3)
281
+ except psutil.TimeoutExpired:
282
+ proc.kill()
283
+ cleaned_count += 1
284
+ except (
285
+ psutil.NoSuchProcess,
286
+ psutil.AccessDenied,
287
+ psutil.ZombieProcess,
288
+ ):
289
+ pass
290
+
291
+ if cleaned_count > 0:
292
+ logger.warning(
293
+ f"Cleaned up {cleaned_count} existing WebSocket server "
294
+ f"process(es). This may have been caused by improper "
295
+ f"shutdown in previous sessions."
296
+ )
297
+ await asyncio.sleep(0.5)
298
+
146
299
  async def start(self):
147
300
  """Start the WebSocket server and connect to it."""
148
- # Check if npm is installed
149
- npm_check = subprocess.run(
150
- ['npm', '--version'],
151
- capture_output=True,
152
- text=True,
153
- )
154
- if npm_check.returncode != 0:
155
- raise RuntimeError(
156
- "npm is not installed or not in PATH. "
157
- "Please install Node.js and npm from https://nodejs.org/ "
158
- "to use the hybrid browser toolkit."
159
- )
301
+ await self._cleanup_existing_processes()
160
302
 
161
- # Check if node is installed
162
- node_check = subprocess.run(
163
- ['node', '--version'],
164
- capture_output=True,
165
- text=True,
166
- )
167
- if node_check.returncode != 0:
168
- raise RuntimeError(
169
- "node is not installed or not in PATH. "
170
- "Please install Node.js from https://nodejs.org/ "
171
- "to use the hybrid browser toolkit."
172
- )
303
+ npm_cmd, node_cmd = await check_and_install_dependencies(self.ts_dir)
173
304
 
174
- # Check if node_modules exists (dependencies installed)
175
- node_modules_path = os.path.join(self.ts_dir, 'node_modules')
176
- if not os.path.exists(node_modules_path):
177
- logger.warning("Node modules not found. Running npm install...")
178
- install_result = subprocess.run(
179
- ['npm', 'install'],
180
- cwd=self.ts_dir,
181
- capture_output=True,
182
- text=True,
183
- )
184
- if install_result.returncode != 0:
185
- logger.error(f"npm install failed: {install_result.stderr}")
186
- raise RuntimeError(
187
- f"Failed to install npm dependencies: {install_result.stderr}\n" # noqa:E501
188
- f"Please run 'npm install' in {self.ts_dir} manually."
189
- )
190
- logger.info("npm dependencies installed successfully")
305
+ import platform
191
306
 
192
- # Ensure the TypeScript code is built
193
- build_result = subprocess.run(
194
- ['npm', 'run', 'build'],
195
- cwd=self.ts_dir,
196
- capture_output=True,
197
- text=True,
198
- )
199
- if build_result.returncode != 0:
200
- logger.error(f"TypeScript build failed: {build_result.stderr}")
201
- raise RuntimeError(
202
- f"TypeScript build failed: {build_result.stderr}"
203
- )
307
+ use_shell = platform.system() == 'Windows'
204
308
 
205
- # Start the WebSocket server
206
309
  self.process = subprocess.Popen(
207
- ['node', 'websocket-server.js'],
310
+ [node_cmd, 'websocket-server.js'],
208
311
  cwd=self.ts_dir,
209
312
  stdout=subprocess.PIPE,
210
- stderr=subprocess.PIPE,
313
+ stderr=subprocess.STDOUT,
211
314
  text=True,
315
+ encoding='utf-8',
316
+ bufsize=1,
317
+ shell=use_shell,
212
318
  )
213
319
 
214
- # Wait for server to output the port
320
+ self._server_ready_future = asyncio.get_running_loop().create_future()
321
+
322
+ self._log_reader_task = asyncio.create_task(
323
+ self._read_and_log_output()
324
+ )
325
+
326
+ if self.browser_log_to_file and self.ts_log_file_path:
327
+ logger.info(
328
+ f"TypeScript console logs will be written to: "
329
+ f"{self.ts_log_file_path}"
330
+ )
331
+
215
332
  server_ready = False
216
- timeout = 10 # 10 seconds timeout
217
- start_time = time.time()
333
+ timeout = 10
218
334
 
219
- while not server_ready and time.time() - start_time < timeout:
220
- if self.process.poll() is not None:
221
- # Process died
222
- stderr = self.process.stderr.read()
223
- raise RuntimeError(
224
- f"WebSocket server failed to start: {stderr}"
225
- )
335
+ try:
336
+ await asyncio.wait_for(self._server_ready_future, timeout=timeout)
337
+ server_ready = True
338
+ except asyncio.TimeoutError:
339
+ server_ready = False
340
+
341
+ if not server_ready:
342
+ await _cleanup_process_and_tasks(
343
+ self.process,
344
+ self._log_reader_task,
345
+ getattr(self, 'ts_log_file', None),
346
+ )
347
+ self.ts_log_file = None
348
+ self.process = None
226
349
 
350
+ error_msg = _create_memory_aware_error(
351
+ "WebSocket server failed to start within timeout"
352
+ )
353
+ raise RuntimeError(error_msg)
354
+
355
+ max_retries = 3
356
+ retry_delays = [1, 2, 4]
357
+
358
+ for attempt in range(max_retries):
227
359
  try:
228
- line = self.process.stdout.readline()
229
- if line.startswith('SERVER_READY:'):
230
- self.server_port = int(line.split(':')[1].strip())
231
- server_ready = True
232
- logger.info(
233
- f"WebSocket server ready on port {self.server_port}"
360
+ connect_timeout = 10.0 + (attempt * 5.0)
361
+
362
+ logger.info(
363
+ f"Attempting to connect to WebSocket server "
364
+ f"(attempt {attempt + 1}/{max_retries}, "
365
+ f"timeout: {connect_timeout}s)"
366
+ )
367
+
368
+ self.websocket = await asyncio.wait_for(
369
+ websockets.connect(
370
+ f"ws://localhost:{self.server_port}",
371
+ ping_interval=30,
372
+ ping_timeout=10,
373
+ max_size=50 * 1024 * 1024,
374
+ ),
375
+ timeout=connect_timeout,
376
+ )
377
+ logger.info("Connected to WebSocket server")
378
+ break
379
+
380
+ except asyncio.TimeoutError:
381
+ if attempt < max_retries - 1:
382
+ delay = retry_delays[attempt]
383
+ logger.warning(
384
+ f"WebSocket handshake timeout "
385
+ f"(attempt {attempt + 1}/{max_retries}). "
386
+ f"Retrying in {delay} seconds..."
387
+ )
388
+ await asyncio.sleep(delay)
389
+ else:
390
+ raise RuntimeError(
391
+ f"Failed to connect to WebSocket server after "
392
+ f"{max_retries} attempts: Handshake timeout"
234
393
  )
235
- except (ValueError, IndexError):
236
- continue
237
394
 
238
- if not server_ready:
239
- self.process.kill()
240
- raise RuntimeError(
241
- "WebSocket server failed to start within timeout"
395
+ except Exception as e:
396
+ if attempt < max_retries - 1 and "timed out" in str(e).lower():
397
+ delay = retry_delays[attempt]
398
+ logger.warning(
399
+ f"WebSocket connection failed "
400
+ f"(attempt {attempt + 1}/{max_retries}): {e}. "
401
+ f"Retrying in {delay} seconds..."
402
+ )
403
+ await asyncio.sleep(delay)
404
+ else:
405
+ break
406
+
407
+ if not self.websocket:
408
+ await _cleanup_process_and_tasks(
409
+ self.process,
410
+ self._log_reader_task,
411
+ getattr(self, 'ts_log_file', None),
242
412
  )
413
+ self.ts_log_file = None
414
+ self.process = None
243
415
 
244
- # Connect to the WebSocket server
245
- try:
246
- self.websocket = await websockets.connect(
247
- f"ws://localhost:{self.server_port}",
248
- ping_interval=30,
249
- ping_timeout=10,
250
- max_size=50 * 1024 * 1024, # 50MB limit to match server
416
+ error_msg = _create_memory_aware_error(
417
+ "Failed to connect to WebSocket server after multiple attempts"
251
418
  )
252
- logger.info("Connected to WebSocket server")
253
- except Exception as e:
254
- self.process.kill()
255
- raise RuntimeError(
256
- f"Failed to connect to WebSocket server: {e}"
257
- ) from e
419
+ raise RuntimeError(error_msg)
258
420
 
259
- # Start the background receiver task
260
421
  self._receive_task = asyncio.create_task(self._receive_loop())
261
422
 
262
- # Initialize the browser toolkit
263
423
  await self._send_command('init', self.config)
264
424
 
425
+ if self.config.get('cdpUrl'):
426
+ self._browser_opened = True
427
+
265
428
  async def stop(self):
266
429
  """Stop the WebSocket connection and server."""
267
- # Cancel the receiver task
268
- if self._receive_task and not self._receive_task.done():
269
- self._receive_task.cancel()
270
- try:
271
- await self._receive_task
272
- except asyncio.CancelledError:
273
- pass
274
-
275
430
  if self.websocket:
276
- try:
277
- await self._send_command('shutdown', {})
431
+ with contextlib.suppress(asyncio.TimeoutError, Exception):
432
+ await asyncio.wait_for(
433
+ self._send_command('shutdown', {}),
434
+ timeout=2.0,
435
+ )
436
+
437
+ with contextlib.suppress(Exception):
278
438
  await self.websocket.close()
439
+ self.websocket = None
440
+
441
+ self._browser_opened = False
442
+
443
+ # Gracefully stop the Node process before cancelling the log reader
444
+ if self.process:
445
+ try:
446
+ # give the process a short grace period to exit after shutdown
447
+ self.process.wait(timeout=2)
448
+ except subprocess.TimeoutExpired:
449
+ try:
450
+ self.process.terminate()
451
+ self.process.wait(timeout=3)
452
+ except subprocess.TimeoutExpired:
453
+ with contextlib.suppress(ProcessLookupError, Exception):
454
+ self.process.kill()
455
+ self.process.wait()
456
+ except Exception as e:
457
+ logger.warning(f"Error terminating process: {e}")
279
458
  except Exception as e:
280
- logger.warning(f"Error during websocket shutdown: {e}")
281
- finally:
282
- self.websocket = None
459
+ logger.warning(f"Error waiting for process: {e}")
460
+
461
+ # Now cancel background tasks (reader won't block on readline)
462
+ tasks_to_cancel = [
463
+ ('_receive_task', self._receive_task),
464
+ ('_log_reader_task', self._log_reader_task),
465
+ ]
466
+ for _, task in tasks_to_cancel:
467
+ if task and not task.done():
468
+ task.cancel()
469
+ with contextlib.suppress(asyncio.CancelledError):
470
+ await task
471
+
472
+ # Close TS log file if open
473
+ if getattr(self, 'ts_log_file', None):
474
+ with contextlib.suppress(Exception):
475
+ self.ts_log_file.close()
476
+ self.ts_log_file = None
477
+
478
+ # Ensure process handle cleared
479
+ self.process = None
480
+
481
+ async def disconnect_only(self):
482
+ """Disconnect WebSocket and stop server without closing the browser.
483
+
484
+ This is useful for CDP mode where the browser should remain open.
485
+ """
486
+ if self.websocket:
487
+ with contextlib.suppress(Exception):
488
+ await self.websocket.close()
489
+ self.websocket = None
490
+
491
+ self._browser_opened = False
283
492
 
493
+ # Stop the Node process
284
494
  if self.process:
285
495
  try:
496
+ # Send SIGTERM to gracefully shutdown
286
497
  self.process.terminate()
287
- self.process.wait(timeout=5)
498
+ self.process.wait(timeout=3)
288
499
  except subprocess.TimeoutExpired:
289
- self.process.kill()
290
- self.process.wait()
500
+ # Force kill if needed
501
+ with contextlib.suppress(ProcessLookupError, Exception):
502
+ self.process.kill()
503
+ self.process.wait()
291
504
  except Exception as e:
292
505
  logger.warning(f"Error terminating process: {e}")
293
- finally:
294
- self.process = None
506
+
507
+ # Cancel background tasks
508
+ tasks_to_cancel = [
509
+ ('_receive_task', self._receive_task),
510
+ ('_log_reader_task', self._log_reader_task),
511
+ ]
512
+ for _, task in tasks_to_cancel:
513
+ if task and not task.done():
514
+ task.cancel()
515
+ with contextlib.suppress(asyncio.CancelledError):
516
+ await task
517
+
518
+ # Close TS log file if open
519
+ if getattr(self, 'ts_log_file', None):
520
+ with contextlib.suppress(Exception):
521
+ self.ts_log_file.close()
522
+ self.ts_log_file = None
523
+
524
+ # Ensure process handle cleared
525
+ self.process = None
526
+
527
+ logger.info("WebSocket disconnected without closing browser")
295
528
 
296
529
  async def _log_action(
297
530
  self,
@@ -366,7 +599,16 @@ class WebSocketBrowserWrapper:
366
599
  except asyncio.CancelledError:
367
600
  break
368
601
  except Exception as e:
369
- logger.error(f"Error in receive loop: {e}")
602
+ # Check if it's a normal WebSocket close
603
+ if isinstance(e, websockets.exceptions.ConnectionClosed):
604
+ if e.code == 1000: # Normal closure
605
+ logger.debug(f"WebSocket closed normally: {e}")
606
+ else:
607
+ logger.warning(
608
+ f"WebSocket closed with code {e.code}: {e}"
609
+ )
610
+ else:
611
+ logger.error(f"Error in receive loop: {e}")
370
612
  # Notify all pending futures of the error
371
613
  for future in self._pending_responses.values():
372
614
  if not future.done():
@@ -379,16 +621,20 @@ class WebSocketBrowserWrapper:
379
621
  async def _ensure_connection(self) -> None:
380
622
  """Ensure WebSocket connection is alive."""
381
623
  if not self.websocket:
382
- raise RuntimeError("WebSocket not connected")
624
+ error_msg = _create_memory_aware_error("WebSocket not connected")
625
+ raise RuntimeError(error_msg)
383
626
 
384
627
  # Check if connection is still alive
385
628
  try:
386
- # Send a ping to check connection
387
- await self.websocket.ping()
629
+ # Send a ping and wait for the corresponding pong (bounded wait)
630
+ pong_waiter = await self.websocket.ping()
631
+ await asyncio.wait_for(pong_waiter, timeout=5.0)
388
632
  except Exception as e:
389
633
  logger.warning(f"WebSocket ping failed: {e}")
390
634
  self.websocket = None
391
- raise RuntimeError("WebSocket connection lost")
635
+
636
+ error_msg = _create_memory_aware_error("WebSocket connection lost")
637
+ raise RuntimeError(error_msg)
392
638
 
393
639
  async def _send_command(
394
640
  self, command: str, params: Dict[str, Any]
@@ -396,11 +642,15 @@ class WebSocketBrowserWrapper:
396
642
  """Send a command to the WebSocket server and get response."""
397
643
  await self._ensure_connection()
398
644
 
645
+ # Process params to ensure refs have 'e' prefix
646
+ params = self._process_refs_in_params(params)
647
+
399
648
  message_id = str(uuid.uuid4())
400
649
  message = {'id': message_id, 'command': command, 'params': params}
401
650
 
402
651
  # Create a future for this message
403
- future: asyncio.Future[Dict[str, Any]] = asyncio.Future()
652
+ loop = asyncio.get_running_loop()
653
+ future: asyncio.Future[Dict[str, Any]] = loop.create_future()
404
654
  self._pending_responses[message_id] = future
405
655
 
406
656
  try:
@@ -424,6 +674,16 @@ class WebSocketBrowserWrapper:
424
674
  except asyncio.TimeoutError:
425
675
  # Remove from pending if timeout
426
676
  self._pending_responses.pop(message_id, None)
677
+ # Special handling for shutdown command
678
+ if command == 'shutdown':
679
+ logger.debug(
680
+ "Shutdown command timeout is expected - "
681
+ "server may have closed before responding"
682
+ )
683
+ # Return a success response for shutdown
684
+ return {
685
+ 'message': 'Browser shutdown (no response received)'
686
+ }
427
687
  raise RuntimeError(
428
688
  f"Timeout waiting for response to command: {command}"
429
689
  )
@@ -437,6 +697,12 @@ class WebSocketBrowserWrapper:
437
697
  "close frame" in str(e)
438
698
  or "connection closed" in str(e).lower()
439
699
  ):
700
+ # Special handling for shutdown command
701
+ if command == 'shutdown':
702
+ logger.debug(
703
+ f"Connection closed during shutdown (expected): {e}"
704
+ )
705
+ return {'message': 'Browser shutdown (connection closed)'}
440
706
  logger.error(f"WebSocket connection closed unexpectedly: {e}")
441
707
  # Mark connection as closed
442
708
  self.websocket = None
@@ -457,17 +723,31 @@ class WebSocketBrowserWrapper:
457
723
  response = await self._send_command(
458
724
  'open_browser', {'startUrl': start_url}
459
725
  )
726
+ self._browser_opened = True
460
727
  return response
461
728
 
462
729
  @action_logger
463
730
  async def close_browser(self) -> str:
464
731
  """Close browser."""
465
732
  response = await self._send_command('close_browser', {})
733
+ self._browser_opened = False
466
734
  return response['message']
467
735
 
468
736
  @action_logger
469
737
  async def visit_page(self, url: str) -> Dict[str, Any]:
470
- """Visit a page."""
738
+ """Visit a page.
739
+
740
+ In non-CDP mode, automatically opens browser if not already open.
741
+ """
742
+ if not self._browser_opened:
743
+ is_cdp_mode = bool(self.config.get('cdpUrl'))
744
+
745
+ if not is_cdp_mode:
746
+ logger.info(
747
+ "Browser not open, automatically opening browser..."
748
+ )
749
+ await self.open_browser()
750
+
471
751
  response = await self._send_command('visit_page', {'url': url})
472
752
  return response
473
753
 
@@ -503,6 +783,55 @@ class WebSocketBrowserWrapper:
503
783
 
504
784
  return ToolResult(text=response['text'], images=response['images'])
505
785
 
786
+ def _ensure_ref_prefix(self, ref: str) -> str:
787
+ """Ensure ref has proper prefix"""
788
+ if not ref:
789
+ return ref
790
+
791
+ # If ref is purely numeric, add 'e' prefix for main frame
792
+ if ref.isdigit():
793
+ return f'e{ref}'
794
+
795
+ return ref
796
+
797
+ def _process_refs_in_params(
798
+ self, params: Dict[str, Any]
799
+ ) -> Dict[str, Any]:
800
+ """Process parameters to ensure all refs have 'e' prefix."""
801
+ if not params:
802
+ return params
803
+
804
+ # Create a copy to avoid modifying the original
805
+ processed = params.copy()
806
+
807
+ # Handle direct ref parameters
808
+ if 'ref' in processed:
809
+ processed['ref'] = self._ensure_ref_prefix(processed['ref'])
810
+
811
+ # Handle from_ref and to_ref for drag operations
812
+ if 'from_ref' in processed:
813
+ processed['from_ref'] = self._ensure_ref_prefix(
814
+ processed['from_ref']
815
+ )
816
+ if 'to_ref' in processed:
817
+ processed['to_ref'] = self._ensure_ref_prefix(processed['to_ref'])
818
+
819
+ # Handle inputs array for type_multiple
820
+ if 'inputs' in processed and isinstance(processed['inputs'], list):
821
+ processed_inputs = []
822
+ for input_item in processed['inputs']:
823
+ if isinstance(input_item, dict) and 'ref' in input_item:
824
+ processed_input = input_item.copy()
825
+ processed_input['ref'] = self._ensure_ref_prefix(
826
+ input_item['ref']
827
+ )
828
+ processed_inputs.append(processed_input)
829
+ else:
830
+ processed_inputs.append(input_item)
831
+ processed['inputs'] = processed_inputs
832
+
833
+ return processed
834
+
506
835
  @action_logger
507
836
  async def click(self, ref: str) -> Dict[str, Any]:
508
837
  """Click an element."""
@@ -513,6 +842,16 @@ class WebSocketBrowserWrapper:
513
842
  async def type(self, ref: str, text: str) -> Dict[str, Any]:
514
843
  """Type text into an element."""
515
844
  response = await self._send_command('type', {'ref': ref, 'text': text})
845
+ # Log the response for debugging
846
+ logger.debug(f"Type response for ref {ref}: {response}")
847
+ return response
848
+
849
+ @action_logger
850
+ async def type_multiple(
851
+ self, inputs: List[Dict[str, str]]
852
+ ) -> Dict[str, Any]:
853
+ """Type text into multiple elements."""
854
+ response = await self._send_command('type', {'inputs': inputs})
516
855
  return response
517
856
 
518
857
  @action_logger
@@ -537,6 +876,31 @@ class WebSocketBrowserWrapper:
537
876
  response = await self._send_command('enter', {})
538
877
  return response
539
878
 
879
+ @action_logger
880
+ async def mouse_control(
881
+ self, control: str, x: float, y: float
882
+ ) -> Dict[str, Any]:
883
+ """Control the mouse to interact with browser with x, y coordinates."""
884
+ response = await self._send_command(
885
+ 'mouse_control', {'control': control, 'x': x, 'y': y}
886
+ )
887
+ return response
888
+
889
+ @action_logger
890
+ async def mouse_drag(self, from_ref: str, to_ref: str) -> Dict[str, Any]:
891
+ """Control the mouse to drag and drop in the browser using ref IDs."""
892
+ response = await self._send_command(
893
+ 'mouse_drag',
894
+ {'from_ref': from_ref, 'to_ref': to_ref},
895
+ )
896
+ return response
897
+
898
+ @action_logger
899
+ async def press_key(self, keys: List[str]) -> Dict[str, Any]:
900
+ """Press key and key combinations."""
901
+ response = await self._send_command('press_key', {'keys': keys})
902
+ return response
903
+
540
904
  @action_logger
541
905
  async def back(self) -> Dict[str, Any]:
542
906
  """Navigate back."""
@@ -571,6 +935,22 @@ class WebSocketBrowserWrapper:
571
935
  # Fallback if wrapped in an object
572
936
  return response.get('tabs', [])
573
937
 
938
+ @action_logger
939
+ async def console_view(self) -> List[Dict[str, Any]]:
940
+ """Get current page console view"""
941
+ response = await self._send_command('console_view', {})
942
+
943
+ if isinstance(response, list):
944
+ return response
945
+
946
+ return response.get('logs', [])
947
+
948
+ @action_logger
949
+ async def console_exec(self, code: str) -> Dict[str, Any]:
950
+ """Execute javascript code and get result."""
951
+ response = await self._send_command('console_exec', {'code': code})
952
+ return response
953
+
574
954
  @action_logger
575
955
  async def wait_user(
576
956
  self, timeout_sec: Optional[float] = None
@@ -580,3 +960,73 @@ class WebSocketBrowserWrapper:
580
960
  'wait_user', {'timeout': timeout_sec}
581
961
  )
582
962
  return response
963
+
964
+ async def _read_and_log_output(self):
965
+ """Read stdout from Node.js process & handle SERVER_READY + logging."""
966
+ if not self.process:
967
+ return
968
+
969
+ try:
970
+ with contextlib.ExitStack() as stack:
971
+ if self.ts_log_file_path:
972
+ self.ts_log_file = stack.enter_context(
973
+ open(self.ts_log_file_path, 'w', encoding='utf-8')
974
+ )
975
+ self.ts_log_file.write(
976
+ f"TypeScript Console Log - Started at "
977
+ f"{time.strftime('%Y-%m-%d %H:%M:%S')}\n"
978
+ )
979
+ self.ts_log_file.write("=" * 80 + "\n")
980
+ self.ts_log_file.flush()
981
+
982
+ while self.process and self.process.poll() is None:
983
+ try:
984
+ line = (
985
+ await asyncio.get_running_loop().run_in_executor(
986
+ None, self.process.stdout.readline
987
+ )
988
+ )
989
+ if not line: # EOF
990
+ break
991
+
992
+ # Check for SERVER_READY message
993
+ if line.startswith('SERVER_READY:'):
994
+ try:
995
+ self.server_port = int(
996
+ line.split(':', 1)[1].strip()
997
+ )
998
+ logger.info(
999
+ f"WebSocket server ready on port "
1000
+ f"{self.server_port}"
1001
+ )
1002
+ if (
1003
+ self._server_ready_future
1004
+ and not self._server_ready_future.done()
1005
+ ):
1006
+ self._server_ready_future.set_result(True)
1007
+ except (ValueError, IndexError) as e:
1008
+ logger.error(
1009
+ f"Failed to parse SERVER_READY: {e}"
1010
+ )
1011
+
1012
+ # Write all output to log file
1013
+ if self.ts_log_file:
1014
+ timestamp = time.strftime('%H:%M:%S')
1015
+ self.ts_log_file.write(f"[{timestamp}] {line}")
1016
+ self.ts_log_file.flush()
1017
+
1018
+ except Exception as e:
1019
+ logger.warning(f"Error reading stdout: {e}")
1020
+ break
1021
+
1022
+ # Footer if we had a file
1023
+ if self.ts_log_file:
1024
+ self.ts_log_file.write("\n" + "=" * 80 + "\n")
1025
+ self.ts_log_file.write(
1026
+ f"TypeScript Console Log - Ended at "
1027
+ f"{time.strftime('%Y-%m-%d %H:%M:%S')}\n"
1028
+ )
1029
+ # ExitStack closes file; clear handle
1030
+ self.ts_log_file = None
1031
+ except Exception as e:
1032
+ logger.warning(f"Error in _read_and_log_output: {e}")