camel-ai 0.2.72a10__py3-none-any.whl → 0.2.73a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (36) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +113 -338
  3. camel/memories/agent_memories.py +18 -17
  4. camel/societies/workforce/prompts.py +10 -4
  5. camel/societies/workforce/single_agent_worker.py +7 -5
  6. camel/toolkits/__init__.py +4 -1
  7. camel/toolkits/base.py +57 -1
  8. camel/toolkits/hybrid_browser_toolkit/config_loader.py +136 -413
  9. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +796 -1631
  10. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +4356 -0
  11. camel/toolkits/hybrid_browser_toolkit/ts/package.json +33 -0
  12. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  13. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +916 -0
  14. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +226 -0
  15. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +522 -0
  16. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  17. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +110 -0
  18. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  19. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +210 -0
  20. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +533 -0
  21. camel/toolkits/message_integration.py +592 -0
  22. camel/toolkits/screenshot_toolkit.py +116 -31
  23. camel/toolkits/search_toolkit.py +20 -2
  24. camel/toolkits/terminal_toolkit.py +16 -2
  25. camel/toolkits/video_analysis_toolkit.py +13 -13
  26. camel/toolkits/video_download_toolkit.py +11 -11
  27. {camel_ai-0.2.72a10.dist-info → camel_ai-0.2.73a0.dist-info}/METADATA +10 -4
  28. {camel_ai-0.2.72a10.dist-info → camel_ai-0.2.73a0.dist-info}/RECORD +30 -24
  29. camel/toolkits/hybrid_browser_toolkit/actions.py +0 -417
  30. camel/toolkits/hybrid_browser_toolkit/agent.py +0 -311
  31. camel/toolkits/hybrid_browser_toolkit/browser_session.py +0 -740
  32. camel/toolkits/hybrid_browser_toolkit/snapshot.py +0 -227
  33. camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
  34. camel/toolkits/hybrid_browser_toolkit/unified_analyzer.js +0 -1002
  35. {camel_ai-0.2.72a10.dist-info → camel_ai-0.2.73a0.dist-info}/WHEEL +0 -0
  36. {camel_ai-0.2.72a10.dist-info → camel_ai-0.2.73a0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,533 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import asyncio
16
+ import datetime
17
+ import json
18
+ import os
19
+ import subprocess
20
+ import time
21
+ import uuid
22
+ from functools import wraps
23
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
24
+
25
+ if TYPE_CHECKING:
26
+ import websockets
27
+ else:
28
+ try:
29
+ import websockets
30
+ except ImportError:
31
+ websockets = None
32
+
33
+ from camel.logger import get_logger
34
+ from camel.utils.tool_result import ToolResult
35
+
36
+ logger = get_logger(__name__)
37
+
38
+
39
+ def action_logger(func):
40
+ """Decorator to add logging to action methods."""
41
+
42
+ @wraps(func)
43
+ async def wrapper(self, *args, **kwargs):
44
+ action_name = func.__name__
45
+ start_time = time.time()
46
+
47
+ # Log inputs (skip self)
48
+ inputs = {
49
+ "args": args,
50
+ "kwargs": kwargs,
51
+ }
52
+
53
+ try:
54
+ # Execute the original function
55
+ result = await func(self, *args, **kwargs)
56
+ execution_time = time.time() - start_time
57
+
58
+ # Extract page load time if available
59
+ page_load_time = None
60
+ if isinstance(result, dict) and 'page_load_time_ms' in result:
61
+ page_load_time = result['page_load_time_ms'] / 1000.0
62
+
63
+ # Log success
64
+ await self._log_action(
65
+ action_name=action_name,
66
+ inputs=inputs,
67
+ outputs=result,
68
+ execution_time=execution_time,
69
+ page_load_time=page_load_time,
70
+ )
71
+
72
+ return result
73
+
74
+ except Exception as e:
75
+ execution_time = time.time() - start_time
76
+ error_msg = f"{type(e).__name__}: {e!s}"
77
+
78
+ # Log error
79
+ await self._log_action(
80
+ action_name=action_name,
81
+ inputs=inputs,
82
+ outputs=None,
83
+ execution_time=execution_time,
84
+ error=error_msg,
85
+ )
86
+
87
+ raise
88
+
89
+ return wrapper
90
+
91
+
92
+ class WebSocketBrowserWrapper:
93
+ """Python wrapper for the TypeScript hybrid browser
94
+ toolkit implementation using WebSocket."""
95
+
96
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
97
+ """Initialize the wrapper.
98
+
99
+ Args:
100
+ config: Configuration dictionary for the browser toolkit
101
+ """
102
+ if websockets is None:
103
+ raise ImportError(
104
+ "websockets package is required for WebSocket communication. "
105
+ "Install with: pip install websockets"
106
+ )
107
+
108
+ self.config = config or {}
109
+ self.ts_dir = os.path.join(os.path.dirname(__file__), 'ts')
110
+ self.process: Optional[subprocess.Popen] = None
111
+ self.websocket = None
112
+ self.server_port = None
113
+
114
+ # Logging configuration
115
+ self.browser_log_to_file = (config or {}).get(
116
+ 'browser_log_to_file', False
117
+ )
118
+ self.session_id = (config or {}).get('session_id', 'default')
119
+ self.log_file_path: Optional[str] = None
120
+ self.log_buffer: List[Dict[str, Any]] = []
121
+
122
+ # Set up log file if needed
123
+ if self.browser_log_to_file:
124
+ log_dir = "browser_log"
125
+ os.makedirs(log_dir, exist_ok=True)
126
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
127
+ self.log_file_path = os.path.join(
128
+ log_dir,
129
+ f"hybrid_browser_toolkit_ws_{timestamp}_{self.session_id}.log",
130
+ )
131
+
132
+ async def __aenter__(self):
133
+ """Async context manager entry."""
134
+ await self.start()
135
+ return self
136
+
137
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
138
+ """Async context manager exit."""
139
+ await self.stop()
140
+
141
+ async def start(self):
142
+ """Start the WebSocket server and connect to it."""
143
+ # Check if npm is installed
144
+ npm_check = subprocess.run(
145
+ ['npm', '--version'],
146
+ capture_output=True,
147
+ text=True,
148
+ )
149
+ if npm_check.returncode != 0:
150
+ raise RuntimeError(
151
+ "npm is not installed or not in PATH. "
152
+ "Please install Node.js and npm from https://nodejs.org/ "
153
+ "to use the hybrid browser toolkit."
154
+ )
155
+
156
+ # Check if node is installed
157
+ node_check = subprocess.run(
158
+ ['node', '--version'],
159
+ capture_output=True,
160
+ text=True,
161
+ )
162
+ if node_check.returncode != 0:
163
+ raise RuntimeError(
164
+ "node is not installed or not in PATH. "
165
+ "Please install Node.js from https://nodejs.org/ "
166
+ "to use the hybrid browser toolkit."
167
+ )
168
+
169
+ # Check if node_modules exists (dependencies installed)
170
+ node_modules_path = os.path.join(self.ts_dir, 'node_modules')
171
+ if not os.path.exists(node_modules_path):
172
+ logger.warning("Node modules not found. Running npm install...")
173
+ install_result = subprocess.run(
174
+ ['npm', 'install'],
175
+ cwd=self.ts_dir,
176
+ capture_output=True,
177
+ text=True,
178
+ )
179
+ if install_result.returncode != 0:
180
+ logger.error(f"npm install failed: {install_result.stderr}")
181
+ raise RuntimeError(
182
+ f"Failed to install npm dependencies: {install_result.stderr}\n" # noqa:E501
183
+ f"Please run 'npm install' in {self.ts_dir} manually."
184
+ )
185
+ logger.info("npm dependencies installed successfully")
186
+
187
+ # Ensure the TypeScript code is built
188
+ build_result = subprocess.run(
189
+ ['npm', 'run', 'build'],
190
+ cwd=self.ts_dir,
191
+ capture_output=True,
192
+ text=True,
193
+ )
194
+ if build_result.returncode != 0:
195
+ logger.error(f"TypeScript build failed: {build_result.stderr}")
196
+ raise RuntimeError(
197
+ f"TypeScript build failed: {build_result.stderr}"
198
+ )
199
+
200
+ # Start the WebSocket server
201
+ self.process = subprocess.Popen(
202
+ ['node', 'websocket-server.js'],
203
+ cwd=self.ts_dir,
204
+ stdout=subprocess.PIPE,
205
+ stderr=subprocess.PIPE,
206
+ text=True,
207
+ )
208
+
209
+ # Wait for server to output the port
210
+ server_ready = False
211
+ timeout = 10 # 10 seconds timeout
212
+ start_time = time.time()
213
+
214
+ while not server_ready and time.time() - start_time < timeout:
215
+ if self.process.poll() is not None:
216
+ # Process died
217
+ stderr = self.process.stderr.read()
218
+ raise RuntimeError(
219
+ f"WebSocket server failed to start: {stderr}"
220
+ )
221
+
222
+ try:
223
+ line = self.process.stdout.readline()
224
+ if line.startswith('SERVER_READY:'):
225
+ self.server_port = int(line.split(':')[1].strip())
226
+ server_ready = True
227
+ logger.info(
228
+ f"WebSocket server ready on port {self.server_port}"
229
+ )
230
+ except (ValueError, IndexError):
231
+ continue
232
+
233
+ if not server_ready:
234
+ self.process.kill()
235
+ raise RuntimeError(
236
+ "WebSocket server failed to start within timeout"
237
+ )
238
+
239
+ # Connect to the WebSocket server
240
+ try:
241
+ self.websocket = await websockets.connect(
242
+ f"ws://localhost:{self.server_port}",
243
+ ping_interval=30,
244
+ ping_timeout=10,
245
+ max_size=50 * 1024 * 1024, # 50MB limit to match server
246
+ )
247
+ logger.info("Connected to WebSocket server")
248
+ except Exception as e:
249
+ self.process.kill()
250
+ raise RuntimeError(
251
+ f"Failed to connect to WebSocket server: {e}"
252
+ ) from e
253
+
254
+ # Initialize the browser toolkit
255
+ await self._send_command('init', self.config)
256
+
257
+ async def stop(self):
258
+ """Stop the WebSocket connection and server."""
259
+ if self.websocket:
260
+ try:
261
+ await self._send_command('shutdown', {})
262
+ await self.websocket.close()
263
+ except Exception as e:
264
+ logger.warning(f"Error during websocket shutdown: {e}")
265
+ finally:
266
+ self.websocket = None
267
+
268
+ if self.process:
269
+ try:
270
+ self.process.terminate()
271
+ self.process.wait(timeout=5)
272
+ except subprocess.TimeoutExpired:
273
+ self.process.kill()
274
+ self.process.wait()
275
+ except Exception as e:
276
+ logger.warning(f"Error terminating process: {e}")
277
+ finally:
278
+ self.process = None
279
+
280
+ async def _log_action(
281
+ self,
282
+ action_name: str,
283
+ inputs: Dict[str, Any],
284
+ outputs: Any,
285
+ execution_time: float,
286
+ page_load_time: Optional[float] = None,
287
+ error: Optional[str] = None,
288
+ ) -> None:
289
+ """Log action details with comprehensive
290
+ information including detailed timing breakdown."""
291
+ if not self.browser_log_to_file or not self.log_file_path:
292
+ return
293
+
294
+ # Create log entry
295
+ log_entry = {
296
+ "timestamp": datetime.datetime.now().isoformat(),
297
+ "session_id": self.session_id,
298
+ "action": action_name,
299
+ "execution_time_ms": round(execution_time * 1000, 2),
300
+ "inputs": inputs,
301
+ }
302
+
303
+ if error:
304
+ log_entry["error"] = error
305
+ else:
306
+ # Handle ToolResult objects for JSON serialization
307
+ if hasattr(outputs, 'text') and hasattr(outputs, 'images'):
308
+ # This is a ToolResult object
309
+ log_entry["outputs"] = {
310
+ "text": outputs.text,
311
+ "images_count": len(outputs.images)
312
+ if outputs.images
313
+ else 0,
314
+ }
315
+ else:
316
+ log_entry["outputs"] = outputs
317
+
318
+ if page_load_time is not None:
319
+ log_entry["page_load_time_ms"] = round(page_load_time * 1000, 2)
320
+
321
+ # Write to log file
322
+ try:
323
+ with open(self.log_file_path, 'a', encoding='utf-8') as f:
324
+ f.write(
325
+ json.dumps(log_entry, ensure_ascii=False, indent=2) + '\n'
326
+ )
327
+ except Exception as e:
328
+ logger.error(f"Failed to write to log file: {e}")
329
+
330
+ async def _ensure_connection(self) -> None:
331
+ """Ensure WebSocket connection is alive."""
332
+ if not self.websocket:
333
+ raise RuntimeError("WebSocket not connected")
334
+
335
+ # Check if connection is still alive
336
+ try:
337
+ # Send a ping to check connection
338
+ await self.websocket.ping()
339
+ except Exception as e:
340
+ logger.warning(f"WebSocket ping failed: {e}")
341
+ self.websocket = None
342
+ raise RuntimeError("WebSocket connection lost")
343
+
344
+ async def _send_command(
345
+ self, command: str, params: Dict[str, Any]
346
+ ) -> Dict[str, Any]:
347
+ """Send a command to the WebSocket server and get response."""
348
+ await self._ensure_connection()
349
+
350
+ message_id = str(uuid.uuid4())
351
+ message = {'id': message_id, 'command': command, 'params': params}
352
+
353
+ try:
354
+ # Send command
355
+ if self.websocket is None:
356
+ raise RuntimeError("WebSocket connection not established")
357
+ await self.websocket.send(json.dumps(message))
358
+
359
+ # Wait for response with matching ID
360
+ while True:
361
+ try:
362
+ if self.websocket is None:
363
+ raise RuntimeError("WebSocket connection lost")
364
+ response_data = await asyncio.wait_for(
365
+ self.websocket.recv(), timeout=60.0
366
+ )
367
+ response = json.loads(response_data)
368
+
369
+ # Check if this is the response we're waiting for
370
+ if response.get('id') == message_id:
371
+ if not response.get('success'):
372
+ raise RuntimeError(
373
+ f"Command failed: {response.get('error')}"
374
+ )
375
+ return response['result']
376
+
377
+ except asyncio.TimeoutError:
378
+ raise RuntimeError(
379
+ f"Timeout waiting for response to command: {command}"
380
+ )
381
+ except json.JSONDecodeError as e:
382
+ logger.warning(f"Failed to decode WebSocket response: {e}")
383
+ continue
384
+
385
+ except Exception as e:
386
+ # Check if it's a connection closed error
387
+ if (
388
+ "close frame" in str(e)
389
+ or "connection closed" in str(e).lower()
390
+ ):
391
+ logger.error(f"WebSocket connection closed unexpectedly: {e}")
392
+ # Mark connection as closed
393
+ self.websocket = None
394
+ raise RuntimeError(
395
+ f"WebSocket connection lost "
396
+ f"during {command} operation: {e}"
397
+ )
398
+ else:
399
+ logger.error(f"WebSocket communication error: {e}")
400
+ raise
401
+
402
+ # Browser action methods
403
+ @action_logger
404
+ async def open_browser(
405
+ self, start_url: Optional[str] = None
406
+ ) -> Dict[str, Any]:
407
+ """Open browser."""
408
+ response = await self._send_command(
409
+ 'open_browser', {'startUrl': start_url}
410
+ )
411
+ return response
412
+
413
+ @action_logger
414
+ async def close_browser(self) -> str:
415
+ """Close browser."""
416
+ response = await self._send_command('close_browser', {})
417
+ return response['message']
418
+
419
+ @action_logger
420
+ async def visit_page(self, url: str) -> Dict[str, Any]:
421
+ """Visit a page."""
422
+ response = await self._send_command('visit_page', {'url': url})
423
+ return response
424
+
425
+ @action_logger
426
+ async def get_page_snapshot(self, viewport_limit: bool = False) -> str:
427
+ """Get page snapshot."""
428
+ response = await self._send_command(
429
+ 'get_page_snapshot', {'viewport_limit': viewport_limit}
430
+ )
431
+ # The backend returns the snapshot string directly,
432
+ # not wrapped in an object
433
+ if isinstance(response, str):
434
+ return response
435
+ # Fallback if wrapped in an object
436
+ return response.get('snapshot', '')
437
+
438
+ @action_logger
439
+ async def get_snapshot_for_ai(self) -> Dict[str, Any]:
440
+ """Get snapshot for AI with element details."""
441
+ response = await self._send_command('get_snapshot_for_ai', {})
442
+ return response
443
+
444
+ @action_logger
445
+ async def get_som_screenshot(self) -> ToolResult:
446
+ """Get screenshot."""
447
+ logger.info("Requesting screenshot via WebSocket...")
448
+ start_time = time.time()
449
+
450
+ response = await self._send_command('get_som_screenshot', {})
451
+
452
+ end_time = time.time()
453
+ logger.info(f"Screenshot completed in {end_time - start_time:.2f}s")
454
+
455
+ return ToolResult(text=response['text'], images=response['images'])
456
+
457
+ @action_logger
458
+ async def click(self, ref: str) -> Dict[str, Any]:
459
+ """Click an element."""
460
+ response = await self._send_command('click', {'ref': ref})
461
+ return response
462
+
463
+ @action_logger
464
+ async def type(self, ref: str, text: str) -> Dict[str, Any]:
465
+ """Type text into an element."""
466
+ response = await self._send_command('type', {'ref': ref, 'text': text})
467
+ return response
468
+
469
+ @action_logger
470
+ async def select(self, ref: str, value: str) -> Dict[str, Any]:
471
+ """Select an option."""
472
+ response = await self._send_command(
473
+ 'select', {'ref': ref, 'value': value}
474
+ )
475
+ return response
476
+
477
+ @action_logger
478
+ async def scroll(self, direction: str, amount: int) -> Dict[str, Any]:
479
+ """Scroll the page."""
480
+ response = await self._send_command(
481
+ 'scroll', {'direction': direction, 'amount': amount}
482
+ )
483
+ return response
484
+
485
+ @action_logger
486
+ async def enter(self) -> Dict[str, Any]:
487
+ """Press enter."""
488
+ response = await self._send_command('enter', {})
489
+ return response
490
+
491
+ @action_logger
492
+ async def back(self) -> Dict[str, Any]:
493
+ """Navigate back."""
494
+ response = await self._send_command('back', {})
495
+ return response
496
+
497
+ @action_logger
498
+ async def forward(self) -> Dict[str, Any]:
499
+ """Navigate forward."""
500
+ response = await self._send_command('forward', {})
501
+ return response
502
+
503
+ @action_logger
504
+ async def switch_tab(self, tab_id: str) -> Dict[str, Any]:
505
+ """Switch to a tab."""
506
+ response = await self._send_command('switch_tab', {'tabId': tab_id})
507
+ return response
508
+
509
+ @action_logger
510
+ async def close_tab(self, tab_id: str) -> Dict[str, Any]:
511
+ """Close a tab."""
512
+ response = await self._send_command('close_tab', {'tabId': tab_id})
513
+ return response
514
+
515
+ @action_logger
516
+ async def get_tab_info(self) -> List[Dict[str, Any]]:
517
+ """Get tab information."""
518
+ response = await self._send_command('get_tab_info', {})
519
+ # The backend returns the tab list directly, not wrapped in an object
520
+ if isinstance(response, list):
521
+ return response
522
+ # Fallback if wrapped in an object
523
+ return response.get('tabs', [])
524
+
525
+ @action_logger
526
+ async def wait_user(
527
+ self, timeout_sec: Optional[float] = None
528
+ ) -> Dict[str, Any]:
529
+ """Wait for user input."""
530
+ response = await self._send_command(
531
+ 'wait_user', {'timeout': timeout_sec}
532
+ )
533
+ return response